Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <[email protected]>

Thu, 16 Nov 2017 03:42:40 +0000 (19:42 -0800)

committer Linus Torvalds <[email protected]>

Thu, 16 Nov 2017 03:42:40 +0000 (19:42 -0800)
author Linus Torvalds <[email protected]>
Thu, 16 Nov 2017 03:42:40 +0000 (19:42 -0800)
committer Linus Torvalds <[email protected]>
Thu, 16 Nov 2017 03:42:40 +0000 (19:42 -0800)
diff --combined MAINTAINERS

index cd7e12dc6af431332a6541386223db805f85bdcc,ac814d3dd1c1d97a614e086f4ea2fcd58178a03a..b0543c223f6ac323063014fe1481b644d2e8d4b1
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -695,9 -695,9 +695,9 @@@ F: include/linux/altera_uart.
   F:    include/linux/altera_jtaguart.h
   
   AMAZON ETHERNET DRIVERS
- -M:    Netanel Belgazal <netanel@annapurnalabs.com>
- -R:    Saeed Bishara <saeed@annapurnalabs.com>
- -R:    Zorik Machulsky <zorik@annapurnalabs.com>
+ +M:    Netanel Belgazal <netanel@amazon.com>
+ +R:    Saeed Bishara <saeedb@amazon.com>
+ +R:    Zorik Machulsky <zorik@amazon.com>
   L:    [email protected]
   S:    Supported
   F:    Documentation/networking/ena.txt
@@@ -2712,7 -2712,6 +2712,7 @@@ L:      [email protected]
   S:    Supported
   F:    arch/x86/net/bpf_jit*
   F:    Documentation/networking/filter.txt
+ +F:    Documentation/bpf/
   F:    include/linux/bpf*
   F:    include/linux/filter.h
   F:    include/uapi/linux/bpf*
@@@ -2725,7 -2724,7 +2725,7 @@@ F:      net/core/filter.
   F:    net/sched/act_bpf.c
   F:    net/sched/cls_bpf.c
   F:    samples/bpf/
- -F:    tools/net/bpf*
+ +F:    tools/bpf/
   F:    tools/testing/selftests/bpf/
   
   BROADCOM B44 10/100 ETHERNET DRIVER
@@@ -2904,7 -2903,6 +2904,7 @@@ S:      Maintaine
   F:    drivers/phy/broadcom/phy-brcm-usb*
   
   BROADCOM GENET ETHERNET DRIVER
+ +M:    Doug Berger <[email protected]>
   M:    Florian Fainelli <[email protected]>
   L:    [email protected]
   S:    Supported
@@@ -3090,6 -3088,7 +3090,6 @@@ F:      arch/c6x
   
   CA8210 IEEE-802.15.4 RADIO DRIVER
   M:    Harry Morris <[email protected]>
- -M:    [email protected]
   L:    [email protected]
   W:    https://github.com/Cascoda/ca8210-linux.git
   S:    Maintained
@@@ -3336,22 -3335,17 +3336,22 @@@ S:   Maintaine
   F:    drivers/auxdisplay/cfag12864bfb.c
   F:    include/linux/cfag12864b.h
   
- -CFG80211 and NL80211
+ +802.11 (including CFG80211/NL80211)
   M:    Johannes Berg <[email protected]>
   L:    [email protected]
   W:    http://wireless.kernel.org/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
   S:    Maintained
+ +F:    net/wireless/
   F:    include/uapi/linux/nl80211.h
+ +F:    include/linux/ieee80211.h
+ +F:    include/net/wext.h
   F:    include/net/cfg80211.h
- -F:    net/wireless/*
- -X:    net/wireless/wext*
+ +F:    include/net/iw_handler.h
+ +F:    include/net/ieee80211_radiotap.h
+ +F:    Documentation/driver-api/80211/cfg80211.rst
+ +F:    Documentation/networking/regulatory.txt
   
   CHAR and MISC DRIVERS
   M:    Arnd Bergmann <[email protected]>
@@@ -3427,7 -3421,7 +3427,7 @@@ F:      drivers/scsi/snic
   CISCO VIC ETHERNET NIC DRIVER
   M:    Christian Benvenuti <[email protected]>
   M:    Govindarajulu Varadarajan <[email protected]>
- -M:    Neel Patel <neepatel@cisco.com>
+ +M:    Parvi Kaustubhi <pkaustub@cisco.com>
   S:    Supported
   F:    drivers/net/ethernet/cisco/enic/
   
@@@ -3592,7 -3586,7 +3592,7 @@@ T:      git git://git.kernel.org/pub/scm/lin
   S:    Maintained
   F:    Documentation/cgroup-v1/cpusets.txt
   F:    include/linux/cpuset.h
- -F:    kernel/cpuset.c
+ +F:    kernel/cgroup/cpuset.c
   
   CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
   M:    Johannes Weiner <[email protected]>
@@@ -6805,6 -6799,8 +6805,6 @@@ F:      drivers/ipack
   
   INFINIBAND SUBSYSTEM
   M:    Doug Ledford <[email protected]>
- -M:    Sean Hefty <[email protected]>
- -M:    Hal Rosenstock <[email protected]>
   L:    [email protected]
   W:    http://www.openfabrics.org/
   Q:    http://patchwork.kernel.org/project/linux-rdma/list/
@@@ -7692,16 -7688,6 +7692,6 @@@ F:     include/linux/kdb.
   F:    include/linux/kgdb.h
   F:    kernel/debug/
   
- KMEMCHECK
- M:    Vegard Nossum <[email protected]>
- M:    Pekka Enberg <[email protected]>
- S:    Maintained
- F:    Documentation/dev-tools/kmemcheck.rst
- F:    arch/x86/include/asm/kmemcheck.h
- F:    arch/x86/mm/kmemcheck/
- F:    include/linux/kmemcheck.h
- F:    mm/kmemcheck.c
- 
   KMEMLEAK
   M:    Catalin Marinas <[email protected]>
   S:    Maintained
@@@ -8235,7 -8221,6 +8225,7 @@@ F:      Documentation/networking/mac80211-in
   F:    include/net/mac80211.h
   F:    net/mac80211/
   F:    drivers/net/wireless/mac80211_hwsim.[ch]
+ +F:    Documentation/networking/mac80211_hwsim/README
   
   MAILBOX API
   M:    Jassi Brar <[email protected]>
@@@ -9430,7 -9415,6 +9420,7 @@@ M:      Florian Fainelli <[email protected]
   S:    Maintained
   F:    net/dsa/
   F:    include/net/dsa.h
+ +F:    include/linux/dsa/
   F:    drivers/net/dsa/
   
   NETWORKING [GENERAL]
@@@ -9451,8 -9435,8 +9441,8 @@@ F:      include/uapi/linux/in.
   F:    include/uapi/linux/net.h
   F:    include/uapi/linux/netdevice.h
   F:    include/uapi/linux/net_namespace.h
- -F:    tools/net/
   F:    tools/testing/selftests/net/
+ +F:    lib/net_utils.c
   F:    lib/random32.c
   
   NETWORKING [IPSEC]
@@@ -10520,14 -10504,6 +10510,14 @@@ S: Maintaine
   F:    Documentation/devicetree/bindings/pci/pcie-kirin.txt
   F:    drivers/pci/dwc/pcie-kirin.c
   
+ +PCIE DRIVER FOR HISILICON STB
+ +M:    Jianguo Sun <[email protected]>
+ +M:    Shawn Guo <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/pci/hisilicon-histb-pcie.txt
+ +F:    drivers/pci/dwc/pcie-histb.c
+ +
   PCIE DRIVER FOR MEDIATEK
   M:    Ryder Lee <[email protected]>
   L:    [email protected]
@@@ -10551,13 -10527,6 +10541,13 @@@ S: Maintaine
   F:    Documentation/devicetree/bindings/pci/rockchip-pcie.txt
   F:    drivers/pci/host/pcie-rockchip.c
   
+ +PCI DRIVER FOR V3 SEMICONDUCTOR V360EPC
+ +M:    Linus Walleij <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/pci/v3-v360epc-pci.txt
+ +F:    drivers/pci/host/pci-v3-semi.c
+ +
   PCIE DRIVER FOR ST SPEAR13XX
   M:    Pratyush Anand <[email protected]>
   L:    [email protected]
@@@ -11129,7 -11098,6 +11119,7 @@@ F:   drivers/net/ethernet/qlogic/qede
   
   QLOGIC QL4xxx RDMA DRIVER
   M:    Ram Amrani <[email protected]>
+ +M:    Michal Kalderon <[email protected]>
   M:    Ariel Elior <[email protected]>
   L:    [email protected]
   S:    Supported
@@@ -11542,7 -11510,6 +11532,7 @@@ T:   git git://git.kernel.org/pub/scm/lin
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
   S:    Maintained
   F:    Documentation/rfkill.txt
+ +F:    Documentation/ABI/stable/sysfs-class-rfkill
   F:    net/rfkill/
   
   RHASHTABLE
@@@ -13366,15 -13333,6 +13356,15 @@@ M: Mika Westerberg <mika.westerberg@lin
   M:    Yehezkel Bernat <[email protected]>
   S:    Maintained
   F:    drivers/thunderbolt/
+ +F:    include/linux/thunderbolt.h
+ +
+ +THUNDERBOLT NETWORK DRIVER
+ +M:    Michael Jamet <[email protected]>
+ +M:    Mika Westerberg <[email protected]>
+ +M:    Yehezkel Bernat <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    drivers/net/thunderbolt.c
   
   THUNDERX GPIO DRIVER
   M:    David Daney <[email protected]>
@@@ -14354,15 -14312,12 +14344,15 @@@ S:        Maintaine
   F:    include/linux/virtio_vsock.h
   F:    include/uapi/linux/virtio_vsock.h
   F:    include/uapi/linux/vsockmon.h
+ +F:    include/uapi/linux/vm_sockets_diag.h
+ +F:    net/vmw_vsock/diag.c
   F:    net/vmw_vsock/af_vsock_tap.c
   F:    net/vmw_vsock/virtio_transport_common.c
   F:    net/vmw_vsock/virtio_transport.c
   F:    drivers/net/vsockmon.c
   F:    drivers/vhost/vsock.c
   F:    drivers/vhost/vsock.h
+ +F:    tools/testing/vsock/
   
   VIRTIO CONSOLE DRIVER
   M:    Amit Shah <[email protected]>
@@@ -14626,6 -14581,7 +14616,6 @@@ L:   [email protected]
   S:    Supported
   W:    http://wireless.kernel.org/en/users/Drivers/wil6210
   F:    drivers/net/wireless/ath/wil6210/
- -F:    include/uapi/linux/wil6210_uapi.h
   
   WIMAX STACK
   M:    Inaky Perez-Gonzalez <[email protected]>
diff --combined drivers/infiniband/hw/qib/qib_init.c

index 5243ad30dfc01a181509a21f26c72fec71c70f70,6aebf4e707b98539d32f54b0725230a973bbd007..85dfbba427f631bd7b2dc29e1bc43d411e52c911
--- 1/drivers/infiniband/hw/qib/qib_init.c
--- 2/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@@ -93,7 -93,7 +93,7 @@@ unsigned qib_cc_table_size
   module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
   MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984");
   
- -static void verify_interrupt(unsigned long);
+ +static void verify_interrupt(struct timer_list *);
   
   static struct idr qib_unit_table;
   u32 qib_cpulist_count;
@@@ -233,7 -233,8 +233,7 @@@ int qib_init_pportdata(struct qib_pport
         spin_lock_init(&ppd->cc_shadow_lock);
         init_waitqueue_head(&ppd->state_wait);
   
- -      setup_timer(&ppd->symerr_clear_timer, qib_clear_symerror_on_linkup,
- -                  (unsigned long)ppd);
+ +      timer_setup(&ppd->symerr_clear_timer, qib_clear_symerror_on_linkup, 0);
   
         ppd->qib_wq = NULL;
         ppd->ibport_data.pmastats =
@@@ -427,7 -428,8 +427,7 @@@ static int loadtime_init(struct qib_dev
         qib_get_eeprom_info(dd);
   
         /* setup time (don't start yet) to verify we got interrupt */
- -      setup_timer(&dd->intrchk_timer, verify_interrupt,
- -                  (unsigned long)dd);
+ +      timer_setup(&dd->intrchk_timer, verify_interrupt, 0);
   done:
         return ret;
   }
@@@ -491,9 -493,9 +491,9 @@@ static void enable_chip(struct qib_devd
         }
   }
   
- -static void verify_interrupt(unsigned long opaque)
+ +static void verify_interrupt(struct timer_list *t)
   {
- -      struct qib_devdata *dd = (struct qib_devdata *) opaque;
+ +      struct qib_devdata *dd = from_timer(dd, t, intrchk_timer);
         u64 int_counter;
   
         if (!dd)
@@@ -751,7 -753,8 +751,7 @@@ done
                                 continue;
                         if (dd->flags & QIB_HAS_SEND_DMA)
                                 ret = qib_setup_sdma(ppd);
- -                      setup_timer(&ppd->hol_timer, qib_hol_event,
- -                                  (unsigned long)ppd);
+ +                      timer_setup(&ppd->hol_timer, qib_hol_event, 0);
                         ppd->hol_state = QIB_HOL_UP;
                 }
   
@@@ -812,19 -815,23 +812,19 @@@ static void qib_stop_timers(struct qib_
         struct qib_pportdata *ppd;
         int pidx;
   
- -      if (dd->stats_timer.data) {
+ +      if (dd->stats_timer.function)
                 del_timer_sync(&dd->stats_timer);
- -              dd->stats_timer.data = 0;
- -      }
- -      if (dd->intrchk_timer.data) {
+ +      if (dd->intrchk_timer.function)
                 del_timer_sync(&dd->intrchk_timer);
- -              dd->intrchk_timer.data = 0;
- -      }
         for (pidx = 0; pidx < dd->num_pports; ++pidx) {
                 ppd = dd->pport + pidx;
- -              if (ppd->hol_timer.data)
+ +              if (ppd->hol_timer.function)
                         del_timer_sync(&ppd->hol_timer);
- -              if (ppd->led_override_timer.data) {
+ +              if (ppd->led_override_timer.function) {
                         del_timer_sync(&ppd->led_override_timer);
                         atomic_set(&ppd->led_override_timer_active, 0);
                 }
- -              if (ppd->symerr_clear_timer.data)
+ +              if (ppd->symerr_clear_timer.function)
                         del_timer_sync(&ppd->symerr_clear_timer);
         }
   }
@@@ -1667,8 -1674,9 +1667,9 @@@ int qib_setup_eagerbufs(struct qib_ctxt
         }
         if (!rcd->rcvegrbuf_phys) {
                 rcd->rcvegrbuf_phys =
-                       kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
-                               GFP_KERNEL, rcd->node_id);
+                       kmalloc_array_node(chunk,
+                                          sizeof(rcd->rcvegrbuf_phys[0]),
+                                          GFP_KERNEL, rcd->node_id);
                 if (!rcd->rcvegrbuf_phys)
                         goto bail_rcvegrbuf;
         }
diff --combined drivers/infiniband/sw/rdmavt/qp.c

index 410025a197295b3e864973683ff8dfd6ee846ead,27a1ad79d654dc8b1ebf14f60128e84beb471a57..9177df60742a3cedb0099e026d6083b13fa455fa
--- 1/drivers/infiniband/sw/rdmavt/qp.c
--- 2/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@@ -57,7 -57,7 +57,7 @@@
   #include "vt.h"
   #include "trace.h"
   
- -static void rvt_rc_timeout(unsigned long arg);
+ +static void rvt_rc_timeout(struct timer_list *t);
   
   /*
    * Convert the AETH RNR timeout code into the number of microseconds.
@@@ -238,7 -238,7 +238,7 @@@ int rvt_driver_qp_init(struct rvt_dev_i
         rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
         rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
         rdi->qp_dev->qp_table =
-               kmalloc_node(rdi->qp_dev->qp_table_size *
+               kmalloc_array_node(rdi->qp_dev->qp_table_size,
                              sizeof(*rdi->qp_dev->qp_table),
                              GFP_KERNEL, rdi->dparms.node);
         if (!rdi->qp_dev->qp_table)
@@@ -717,6 -717,7 +717,6 @@@ static void rvt_reset_qp(struct rvt_dev
   
                 /* take qp out the hash and wait for it to be unused */
                 rvt_remove_qp(rdi, qp);
- -              wait_event(qp->wait, !atomic_read(&qp->refcount));
   
                 /* grab the lock b/c it was locked at call time */
                 spin_lock_irq(&qp->r_lock);
@@@ -806,7 -807,6 +806,7 @@@ struct ib_qp *rvt_create_qp(struct ib_p
                 if (init_attr->port_num == 0 ||
                     init_attr->port_num > ibpd->device->phys_port_cnt)
                         return ERR_PTR(-EINVAL);
+ +              /* fall through */
         case IB_QPT_UC:
         case IB_QPT_RC:
         case IB_QPT_UD:
@@@ -845,7 -845,7 +845,7 @@@
                                 goto bail_qp;
                 }
                 /* initialize timers needed for rc qp */
- -              setup_timer(&qp->s_timer, rvt_rc_timeout, (unsigned long)qp);
+ +              timer_setup(&qp->s_timer, rvt_rc_timeout, 0);
                 hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC,
                              HRTIMER_MODE_REL);
                 qp->s_rnr_timer.function = rvt_rc_rnr_retry;
@@@ -894,6 -894,8 +894,6 @@@
                 atomic_set(&qp->refcount, 0);
                 atomic_set(&qp->local_ops_pending, 0);
                 init_waitqueue_head(&qp->wait);
- -              init_timer(&qp->s_timer);
- -              qp->s_timer.data = (unsigned long)qp;
                 INIT_LIST_HEAD(&qp->rspwait);
                 qp->state = IB_QPS_RESET;
                 qp->s_wq = swq;
@@@ -1441,7 -1443,6 +1441,7 @@@ int rvt_destroy_qp(struct ib_qp *ibqp
         spin_unlock(&qp->s_hlock);
         spin_unlock_irq(&qp->r_lock);
   
+ +      wait_event(qp->wait, !atomic_read(&qp->refcount));
         /* qpn is now available for use again */
         rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
   
@@@ -2131,9 -2132,9 +2131,9 @@@ EXPORT_SYMBOL(rvt_del_timers_sync)
   /**
    * This is called from s_timer for missing responses.
    */
- -static void rvt_rc_timeout(unsigned long arg)
+ +static void rvt_rc_timeout(struct timer_list *t)
   {
- -      struct rvt_qp *qp = (struct rvt_qp *)arg;
+ +      struct rvt_qp *qp = from_timer(qp, t, s_timer);
         struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
         unsigned long flags;
   
diff --combined drivers/net/ethernet/amazon/ena/ena_netdev.c

index 5417e4da64cab0e9732fde2909f7084f34a73aa3,fbbbd8b3eb45237e514ee81fe5f7a6d00248f911..7451922c209dc2e3ccc8c2d21a45063c938eb3fd
--- 1/drivers/net/ethernet/amazon/ena/ena_netdev.c
--- 2/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@@ -517,7 -517,7 +517,7 @@@ static int ena_refill_rx_bufs(struct en
   
   
                 rc = ena_alloc_rx_page(rx_ring, rx_info,
-                                      __GFP_COLD | GFP_ATOMIC | __GFP_COMP);
+                                      GFP_ATOMIC | __GFP_COMP);
                 if (unlikely(rc < 0)) {
                         netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
                                    "failed to alloc buffer for rx queue %d\n",
@@@ -2361,6 -2361,38 +2361,6 @@@ static const struct net_device_ops ena_
   #endif /* CONFIG_NET_POLL_CONTROLLER */
   };
   
- -static void ena_device_io_suspend(struct work_struct *work)
- -{
- -      struct ena_adapter *adapter =
- -              container_of(work, struct ena_adapter, suspend_io_task);
- -      struct net_device *netdev = adapter->netdev;
- -
- -      /* ena_napi_disable_all disables only the IO handling.
- -       * We are still subject to AENQ keep alive watchdog.
- -       */
- -      u64_stats_update_begin(&adapter->syncp);
- -      adapter->dev_stats.io_suspend++;
- -      u64_stats_update_begin(&adapter->syncp);
- -      ena_napi_disable_all(adapter);
- -      netif_tx_lock(netdev);
- -      netif_device_detach(netdev);
- -      netif_tx_unlock(netdev);
- -}
- -
- -static void ena_device_io_resume(struct work_struct *work)
- -{
- -      struct ena_adapter *adapter =
- -              container_of(work, struct ena_adapter, resume_io_task);
- -      struct net_device *netdev = adapter->netdev;
- -
- -      u64_stats_update_begin(&adapter->syncp);
- -      adapter->dev_stats.io_resume++;
- -      u64_stats_update_end(&adapter->syncp);
- -
- -      netif_device_attach(netdev);
- -      ena_napi_enable_all(adapter);
- -}
- -
   static int ena_device_validate_params(struct ena_adapter *adapter,
                                       struct ena_com_dev_get_features_ctx *get_feat_ctx)
   {
@@@ -2529,31 -2561,38 +2529,31 @@@ err_disable_msix
         return rc;
   }
   
- -static void ena_fw_reset_device(struct work_struct *work)
+ +static void ena_destroy_device(struct ena_adapter *adapter)
   {
- -      struct ena_com_dev_get_features_ctx get_feat_ctx;
- -      struct ena_adapter *adapter =
- -              container_of(work, struct ena_adapter, reset_task);
         struct net_device *netdev = adapter->netdev;
         struct ena_com_dev *ena_dev = adapter->ena_dev;
- -      struct pci_dev *pdev = adapter->pdev;
- -      bool dev_up, wd_state;
- -      int rc;
- -
- -      if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
- -              dev_err(&pdev->dev,
- -                      "device reset schedule while reset bit is off\n");
- -              return;
- -      }
+ +      bool dev_up;
   
         netif_carrier_off(netdev);
   
         del_timer_sync(&adapter->timer_service);
   
- -      rtnl_lock();
- -
         dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+ +      adapter->dev_up_before_reset = dev_up;
+ +
         ena_com_set_admin_running_state(ena_dev, false);
   
- -      /* After calling ena_close the tx queues and the napi
- -       * are disabled so no one can interfere or touch the
- -       * data structures
- -       */
         ena_close(netdev);
   
+ +      /* Before releasing the ENA resources, a device reset is required.
+ +       * (to prevent the device from accessing them).
+ +       * In case the reset flag is set and the device is up, ena_close
+ +       * already perform the reset, so it can be skipped.
+ +       */
+ +      if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
+ +              ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
+ +
         ena_free_mgmnt_irq(adapter);
   
         ena_disable_msix(adapter);
@@@ -2567,17 -2606,9 +2567,17 @@@
         ena_com_mmio_reg_read_request_destroy(ena_dev);
   
         adapter->reset_reason = ENA_REGS_RESET_NORMAL;
+ +
         clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ +}
   
- -      /* Finish with the destroy part. Start the init part */
+ +static int ena_restore_device(struct ena_adapter *adapter)
+ +{
+ +      struct ena_com_dev_get_features_ctx get_feat_ctx;
+ +      struct ena_com_dev *ena_dev = adapter->ena_dev;
+ +      struct pci_dev *pdev = adapter->pdev;
+ +      bool wd_state;
+ +      int rc;
   
         rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
         if (rc) {
@@@ -2599,7 -2630,7 +2599,7 @@@
                 goto err_device_destroy;
         }
         /* If the interface was up before the reset bring it up */
- -      if (dev_up) {
+ +      if (adapter->dev_up_before_reset) {
                 rc = ena_up(adapter);
                 if (rc) {
                         dev_err(&pdev->dev, "Failed to create I/O queues\n");
@@@ -2608,38 -2639,24 +2608,38 @@@
         }
   
         mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
- -
- -      rtnl_unlock();
- -
         dev_err(&pdev->dev, "Device reset completed successfully\n");
   
- -      return;
+ +      return rc;
   err_disable_msix:
         ena_free_mgmnt_irq(adapter);
         ena_disable_msix(adapter);
   err_device_destroy:
         ena_com_admin_destroy(ena_dev);
   err:
- -      rtnl_unlock();
- -
         clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
   
         dev_err(&pdev->dev,
                 "Reset attempt failed. Can not reset the device\n");
+ +
+ +      return rc;
+ +}
+ +
+ +static void ena_fw_reset_device(struct work_struct *work)
+ +{
+ +      struct ena_adapter *adapter =
+ +              container_of(work, struct ena_adapter, reset_task);
+ +      struct pci_dev *pdev = adapter->pdev;
+ +
+ +      if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ +              dev_err(&pdev->dev,
+ +                      "device reset schedule while reset bit is off\n");
+ +              return;
+ +      }
+ +      rtnl_lock();
+ +      ena_destroy_device(adapter);
+ +      ena_restore_device(adapter);
+ +      rtnl_unlock();
   }
   
   static int check_missing_comp_in_queue(struct ena_adapter *adapter,
@@@ -2648,7 -2665,7 +2648,7 @@@
         struct ena_tx_buffer *tx_buf;
         unsigned long last_jiffies;
         u32 missed_tx = 0;
- -      int i;
+ +      int i, rc = 0;
   
         for (i = 0; i < tx_ring->ring_size; i++) {
                 tx_buf = &tx_ring->tx_buffer_info[i];
@@@ -2662,25 -2679,21 +2662,25 @@@
   
                         tx_buf->print_once = 1;
                         missed_tx++;
- -
- -                      if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
- -                              netif_err(adapter, tx_err, adapter->netdev,
- -                                        "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
- -                                        missed_tx,
- -                                        adapter->missing_tx_completion_threshold);
- -                              adapter->reset_reason =
- -                                      ENA_REGS_RESET_MISS_TX_CMPL;
- -                              set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
- -                              return -EIO;
- -                      }
                 }
         }
   
- -      return 0;
+ +      if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
+ +              netif_err(adapter, tx_err, adapter->netdev,
+ +                        "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
+ +                        missed_tx,
+ +                        adapter->missing_tx_completion_threshold);
+ +              adapter->reset_reason =
+ +                      ENA_REGS_RESET_MISS_TX_CMPL;
+ +              set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ +              rc = -EIO;
+ +      }
+ +
+ +      u64_stats_update_begin(&tx_ring->syncp);
+ +      tx_ring->tx_stats.missed_tx = missed_tx;
+ +      u64_stats_update_end(&tx_ring->syncp);
+ +
+ +      return rc;
   }
   
   static void check_for_missing_tx_completions(struct ena_adapter *adapter)
@@@ -3263,6 -3276,8 +3263,6 @@@ static int ena_probe(struct pci_dev *pd
                 goto err_rss;
         }
   
- -      INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend);
- -      INIT_WORK(&adapter->resume_io_task, ena_device_io_resume);
         INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
   
         adapter->last_keep_alive_jiffies = jiffies;
@@@ -3296,6 -3311,8 +3296,6 @@@ err_free_msix
   err_worker_destroy:
         ena_com_destroy_interrupt_moderation(ena_dev);
         del_timer(&adapter->timer_service);
- -      cancel_work_sync(&adapter->suspend_io_task);
- -      cancel_work_sync(&adapter->resume_io_task);
   err_netdev_destroy:
         free_netdev(netdev);
   err_device_destroy:
@@@ -3365,6 -3382,10 +3365,6 @@@ static void ena_remove(struct pci_dev *
   
         cancel_work_sync(&adapter->reset_task);
   
- -      cancel_work_sync(&adapter->suspend_io_task);
- -
- -      cancel_work_sync(&adapter->resume_io_task);
- -
         /* Reset the device only if the device is running. */
         if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
                 ena_com_dev_reset(ena_dev, adapter->reset_reason);
@@@ -3398,59 -3419,11 +3398,59 @@@
         vfree(ena_dev);
   }
   
+ +#ifdef CONFIG_PM
+ +/* ena_suspend - PM suspend callback
+ + * @pdev: PCI device information struct
+ + * @state:power state
+ + */
+ +static int ena_suspend(struct pci_dev *pdev,  pm_message_t state)
+ +{
+ +      struct ena_adapter *adapter = pci_get_drvdata(pdev);
+ +
+ +      u64_stats_update_begin(&adapter->syncp);
+ +      adapter->dev_stats.suspend++;
+ +      u64_stats_update_end(&adapter->syncp);
+ +
+ +      rtnl_lock();
+ +      if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ +              dev_err(&pdev->dev,
+ +                      "ignoring device reset request as the device is being suspended\n");
+ +              clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ +      }
+ +      ena_destroy_device(adapter);
+ +      rtnl_unlock();
+ +      return 0;
+ +}
+ +
+ +/* ena_resume - PM resume callback
+ + * @pdev: PCI device information struct
+ + *
+ + */
+ +static int ena_resume(struct pci_dev *pdev)
+ +{
+ +      struct ena_adapter *adapter = pci_get_drvdata(pdev);
+ +      int rc;
+ +
+ +      u64_stats_update_begin(&adapter->syncp);
+ +      adapter->dev_stats.resume++;
+ +      u64_stats_update_end(&adapter->syncp);
+ +
+ +      rtnl_lock();
+ +      rc = ena_restore_device(adapter);
+ +      rtnl_unlock();
+ +      return rc;
+ +}
+ +#endif
+ +
   static struct pci_driver ena_pci_driver = {
         .name           = DRV_MODULE_NAME,
         .id_table       = ena_pci_tbl,
         .probe          = ena_probe,
         .remove         = ena_remove,
+ +#ifdef CONFIG_PM
+ +      .suspend    = ena_suspend,
+ +      .resume     = ena_resume,
+ +#endif
         .sriov_configure = ena_sriov_configure,
   };
   
@@@ -3531,6 -3504,16 +3531,6 @@@ static void ena_notification(void *adap
              ENA_ADMIN_NOTIFICATION);
   
         switch (aenq_e->aenq_common_desc.syndrom) {
- -      case ENA_ADMIN_SUSPEND:
- -              /* Suspend just the IO queues.
- -               * We deliberately don't suspend admin so the timer and
- -               * the keep_alive events should remain.
- -               */
- -              queue_work(ena_wq, &adapter->suspend_io_task);
- -              break;
- -      case ENA_ADMIN_RESUME:
- -              queue_work(ena_wq, &adapter->resume_io_task);
- -              break;
         case ENA_ADMIN_UPDATE_HINTS:
                 hints = (struct ena_admin_ena_hw_hints *)
                         (&aenq_e->inline_data_w4);
diff --combined drivers/net/ethernet/cavium/liquidio/octeon_network.h

index 433f3619de8fea9794a2d4acd8e223ef361b0549,57853eead4b5bafa899a332cf91b43cb33bf5218..f2d1a076a038a3bd6b6e730acfb93cc7ac3ffa4a
--- 1/drivers/net/ethernet/cavium/liquidio/octeon_network.h
--- 2/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@@ -136,9 -136,6 +136,9 @@@ struct lio 
         /* work queue for  link status */
         struct cavium_wq        link_status_wq;
   
+ +      /* work queue to regularly send local time to octeon firmware */
+ +      struct cavium_wq        sync_octeon_time_wq;
+ +
         int netdev_uc_count;
   };
   
@@@ -198,7 -195,7 +198,7 @@@ static inline voi
         struct sk_buff *skb;
         struct octeon_skb_page_info *skb_pg_info;
   
-       page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+       page = alloc_page(GFP_ATOMIC);
         if (unlikely(!page))
                 return NULL;
   
diff --combined drivers/net/ethernet/mellanox/mlx4/en_rx.c

index 92aec17f4b4d6a9224e6c8ddadb8fb339f270b03,ffead38cf5da286df5edaab431841798c12a065a..85e28efcda33951124ed21059f4e6e6bc3a93a75
--- 1/drivers/net/ethernet/mellanox/mlx4/en_rx.c
--- 2/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@@ -193,7 -193,7 +193,7 @@@ static int mlx4_en_fill_rx_buffers(stru
   
                         if (mlx4_en_prepare_rx_desc(priv, ring,
                                                     ring->actual_size,
-                                                   GFP_KERNEL | __GFP_COLD)) {
+                                                   GFP_KERNEL)) {
                                 if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
                                         en_err(priv, "Failed to allocate enough rx buffers\n");
                                         return -ENOMEM;
@@@ -254,7 -254,8 +254,7 @@@ void mlx4_en_set_num_rx_rings(struct ml
                                          DEF_RX_RINGS));
   
                 num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS :
- -                      min_t(int, num_of_eqs,
- -                            netif_get_num_default_rss_queues());
+ +                      min_t(int, num_of_eqs, num_online_cpus());
                 mdev->profile.prof[i].rx_ring_num =
                         rounddown_pow_of_two(num_rx_rings);
         }
@@@ -551,8 -552,7 +551,7 @@@ static void mlx4_en_refill_rx_buffers(s
         do {
                 if (mlx4_en_prepare_rx_desc(priv, ring,
                                             ring->prod & ring->size_mask,
-                                           GFP_ATOMIC | __GFP_COLD |
-                                           __GFP_MEMALLOC))
+                                           GFP_ATOMIC | __GFP_MEMALLOC))
                         break;
                 ring->prod++;
         } while (likely(--missing));
@@@ -761,7 -761,6 +760,7 @@@ int mlx4_en_process_rx_cq(struct net_de
   
                         xdp.data_hard_start = va - frags[0].page_offset;
                         xdp.data = va;
+ +                      xdp_set_data_meta_invalid(&xdp);
                         xdp.data_end = xdp.data + length;
                         orig_data = xdp.data;
   
@@@ -778,7 -777,7 +777,7 @@@
                         case XDP_PASS:
                                 break;
                         case XDP_TX:
- -                              if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
+ +                              if (likely(!mlx4_en_xmit_frame(ring, frags, priv,
                                                         length, cq_ring,
                                                         &doorbell_pending))) {
                                         frags[0].page = NULL;
diff --combined drivers/net/ethernet/netronome/nfp/nfp_net_common.c

index 232044b1b7aaf12d53c029633ad561b838ace46c,ffb12dc13a5ac41645a0acb3e399e5b9ffcd2069..1a603fdd9e802d1344e37bcfa719404b69312e43
--- 1/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
--- 2/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@@ -177,9 -177,9 +177,9 @@@ static int nfp_net_reconfig_wait(struc
         return timed_out ? -EIO : 0;
   }
   
- -static void nfp_net_reconfig_timer(unsigned long data)
+ +static void nfp_net_reconfig_timer(struct timer_list *t)
   {
- -      struct nfp_net *nn = (void *)data;
+ +      struct nfp_net *nn = from_timer(nn, t, reconfig_timer);
   
         spin_lock_bh(&nn->reconfig_lock);
   
@@@ -1185,7 -1185,7 +1185,7 @@@ static void *nfp_net_rx_alloc_one(struc
         } else {
                 struct page *page;
   
-               page = alloc_page(GFP_KERNEL | __GFP_COLD);
+               page = alloc_page(GFP_KERNEL);
                 frag = page ? page_address(page) : NULL;
         }
         if (!frag) {
@@@ -1209,15 -1209,15 +1209,15 @@@ static void *nfp_net_napi_alloc_one(str
   
         if (!dp->xdp_prog) {
                 frag = napi_alloc_frag(dp->fl_bufsz);
+ +              if (unlikely(!frag))
+ +                      return NULL;
         } else {
                 struct page *page;
   
- -              page = alloc_page(GFP_ATOMIC);
- -              frag = page ? page_address(page) : NULL;
- -      }
- -      if (!frag) {
- -              nn_dp_warn(dp, "Failed to alloc receive page frag\n");
- -              return NULL;
+ +              page = dev_alloc_page();
+ +              if (unlikely(!page))
+ +                      return NULL;
+ +              frag = page_address(page);
         }
   
         *dma_addr = nfp_net_dma_map_rx(dp, frag);
@@@ -1514,11 -1514,6 +1514,11 @@@ nfp_net_rx_drop(const struct nfp_net_d
   {
         u64_stats_update_begin(&r_vec->rx_sync);
         r_vec->rx_drops++;
+ +      /* If we have both skb and rxbuf the replacement buffer allocation
+ +       * must have failed, count this as an alloc failure.
+ +       */
+ +      if (skb && rxbuf)
+ +              r_vec->rx_replace_buf_alloc_fail++;
         u64_stats_update_end(&r_vec->rx_sync);
   
         /* skb is build based on the frag, free_skb() would free the frag
@@@ -1587,6 -1582,26 +1587,6 @@@ nfp_net_tx_xdp_buf(struct nfp_net_dp *d
         return true;
   }
   
- -static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start,
- -                         unsigned int *off, unsigned int *len)
- -{
- -      struct xdp_buff xdp;
- -      void *orig_data;
- -      int ret;
- -
- -      xdp.data_hard_start = hard_start;
- -      xdp.data = data + *off;
- -      xdp.data_end = data + *off + *len;
- -
- -      orig_data = xdp.data;
- -      ret = bpf_prog_run_xdp(prog, &xdp);
- -
- -      *len -= xdp.data - orig_data;
- -      *off += xdp.data - orig_data;
- -
- -      return ret;
- -}
- -
   /**
    * nfp_net_rx() - receive up to @budget packets on @rx_ring
    * @rx_ring:   RX ring to receive from
@@@ -1622,7 -1637,6 +1622,7 @@@ static int nfp_net_rx(struct nfp_net_rx
                 struct nfp_meta_parsed meta;
                 struct net_device *netdev;
                 dma_addr_t new_dma_addr;
+ +              u32 meta_len_xdp = 0;
                 void *new_frag;
   
                 idx = D_IDX(rx_ring, rx_ring->rd_p);
@@@ -1701,24 -1715,16 +1701,24 @@@
   
                 if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
                                   dp->bpf_offload_xdp) && !meta.portid) {
+ +                      void *orig_data = rxbuf->frag + pkt_off;
                         unsigned int dma_off;
- -                      void *hard_start;
+ +                      struct xdp_buff xdp;
                         int act;
   
- -                      hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
+ +                      xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
+ +                      xdp.data = orig_data;
+ +                      xdp.data_meta = orig_data;
+ +                      xdp.data_end = orig_data + pkt_len;
+ +
+ +                      act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ +
+ +                      pkt_len -= xdp.data - orig_data;
+ +                      pkt_off += xdp.data - orig_data;
   
- -                      act = nfp_net_run_xdp(xdp_prog, rxbuf->frag, hard_start,
- -                                            &pkt_off, &pkt_len);
                         switch (act) {
                         case XDP_PASS:
+ +                              meta_len_xdp = xdp.data - xdp.data_meta;
                                 break;
                         case XDP_TX:
                                 dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
@@@ -1786,8 -1792,6 +1786,8 @@@
                 if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
                         __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
                                                le16_to_cpu(rxd->rxd.vlan));
+ +              if (meta_len_xdp)
+ +                      skb_metadata_set(skb, meta_len_xdp);
   
                 napi_gro_receive(&rx_ring->r_vec->napi, skb);
         }
@@@ -3378,7 -3382,7 +3378,7 @@@ nfp_net_xdp_setup(struct nfp_net *nn, s
         return 0;
   }
   
- -static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
+ +static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
   {
         struct nfp_net *nn = netdev_priv(netdev);
   
@@@ -3393,14 -3397,6 +3393,14 @@@
                         xdp->prog_attached = XDP_ATTACHED_HW;
                 xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0;
                 return 0;
+ +      case BPF_OFFLOAD_VERIFIER_PREP:
+ +              return nfp_app_bpf_verifier_prep(nn->app, nn, xdp);
+ +      case BPF_OFFLOAD_TRANSLATE:
+ +              return nfp_app_bpf_translate(nn->app, nn,
+ +                                           xdp->offload.prog);
+ +      case BPF_OFFLOAD_DESTROY:
+ +              return nfp_app_bpf_destroy(nn->app, nn,
+ +                                         xdp->offload.prog);
         default:
                 return -EINVAL;
         }
@@@ -3449,7 -3445,7 +3449,7 @@@ const struct net_device_ops nfp_net_net
         .ndo_get_phys_port_name = nfp_port_get_phys_port_name,
         .ndo_udp_tunnel_add     = nfp_net_add_vxlan_port,
         .ndo_udp_tunnel_del     = nfp_net_del_vxlan_port,
- -      .ndo_xdp                = nfp_net_xdp,
+ +      .ndo_bpf                = nfp_net_xdp,
   };
   
   /**
@@@ -3550,7 -3546,8 +3550,7 @@@ struct nfp_net *nfp_net_alloc(struct pc
         spin_lock_init(&nn->reconfig_lock);
         spin_lock_init(&nn->link_status_lock);
   
- -      setup_timer(&nn->reconfig_timer,
- -                  nfp_net_reconfig_timer, (unsigned long)nn);
+ +      timer_setup(&nn->reconfig_timer, nfp_net_reconfig_timer, 0);
   
         return nn;
   }
diff --combined drivers/net/ethernet/sfc/falcon/rx.c

index 382019b302db6cc51f2b19d1faa77a2d72660f7a,91097aea6c410f2f3d9455a3f48472cf490838eb..02456ed13a7d467d1a4a54038e112329217283d5
--- 1/drivers/net/ethernet/sfc/falcon/rx.c
--- 2/drivers/net/ethernet/sfc/falcon/rx.c
+++ b/drivers/net/ethernet/sfc/falcon/rx.c
@@@ -163,7 -163,7 +163,7 @@@ static int ef4_init_rx_buffers(struct e
         do {
                 page = ef4_reuse_page(rx_queue);
                 if (page == NULL) {
-                       page = alloc_pages(__GFP_COLD | __GFP_COMP |
+                       page = alloc_pages(__GFP_COMP |
                                            (atomic ? GFP_ATOMIC : GFP_KERNEL),
                                            efx->rx_buffer_order);
                         if (unlikely(page == NULL))
@@@ -376,9 -376,9 +376,9 @@@ void ef4_fast_push_rx_descriptors(struc
                 ef4_nic_notify_rx_desc(rx_queue);
   }
   
- -void ef4_rx_slow_fill(unsigned long context)
+ +void ef4_rx_slow_fill(struct timer_list *t)
   {
- -      struct ef4_rx_queue *rx_queue = (struct ef4_rx_queue *)context;
+ +      struct ef4_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
   
         /* Post an event to cause NAPI to run and refill the queue */
         ef4_nic_generate_fill_event(rx_queue);
diff --combined drivers/net/ethernet/sfc/rx.c

index 8cb60513dca2105d1eabcab9d34bc8502ae2bafa,0004c50d3c834814545e3a4ae2dc406022ca97db..cfe76aad79ee172dca05153c58dfae1b2370066f
--- 1/drivers/net/ethernet/sfc/rx.c
--- 2/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@@ -163,7 -163,7 +163,7 @@@ static int efx_init_rx_buffers(struct e
         do {
                 page = efx_reuse_page(rx_queue);
                 if (page == NULL) {
-                       page = alloc_pages(__GFP_COLD | __GFP_COMP |
+                       page = alloc_pages(__GFP_COMP |
                                            (atomic ? GFP_ATOMIC : GFP_KERNEL),
                                            efx->rx_buffer_order);
                         if (unlikely(page == NULL))
@@@ -376,9 -376,9 +376,9 @@@ void efx_fast_push_rx_descriptors(struc
                 efx_nic_notify_rx_desc(rx_queue);
   }
   
- -void efx_rx_slow_fill(unsigned long context)
+ +void efx_rx_slow_fill(struct timer_list *t)
   {
- -      struct efx_rx_queue *rx_queue = (struct efx_rx_queue *)context;
+ +      struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
   
         /* Post an event to cause NAPI to run and refill the queue */
         efx_nic_generate_fill_event(rx_queue);
diff --combined drivers/net/ethernet/ti/netcp_core.c

index 15e2e3031d365c506ff540a212362011c7c5fa70,50d2b76771b5dfad24303fc52e30fa0f4fd429e9..ed58c746e4af194a9edc8edb3d5c15bd3e2e0acd
--- 1/drivers/net/ethernet/ti/netcp_core.c
--- 2/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@@ -906,7 -906,7 +906,7 @@@ static int netcp_allocate_rx_buf(struc
                 sw_data[0] = (u32)bufptr;
         } else {
                 /* Allocate a secondary receive queue entry */
-               page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD);
+               page = alloc_page(GFP_ATOMIC | GFP_DMA);
                 if (unlikely(!page)) {
                         dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n");
                         goto fail;
@@@ -1887,7 -1887,7 +1887,7 @@@ static int netcp_setup_tc(struct net_de
         /* setup tc must be called under rtnl lock */
         ASSERT_RTNL();
   
- -      if (type != TC_SETUP_MQPRIO)
+ +      if (type != TC_SETUP_QDISC_MQPRIO)
                 return -EOPNOTSUPP;
   
         mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
diff --combined drivers/net/virtio_net.c

index edf984406ba0a00e3a364054be04e18a28628acc,5eec09d63fc03bf1e7daaebbd3e96fa21d04a578..19a985ef9104ba129086d53f7661ba880d2cb5d3
--- 1/drivers/net/virtio_net.c
--- 2/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@@ -29,7 -29,6 +29,7 @@@
   #include <linux/slab.h>
   #include <linux/cpu.h>
   #include <linux/average.h>
+ +#include <linux/filter.h>
   #include <net/route.h>
   
   static int napi_weight = NAPI_POLL_WEIGHT;
@@@ -373,20 -372,9 +373,20 @@@ static struct sk_buff *page_to_skb(stru
         return skb;
   }
   
- -static bool virtnet_xdp_xmit(struct virtnet_info *vi,
- -                           struct receive_queue *rq,
- -                           struct xdp_buff *xdp)
+ +static void virtnet_xdp_flush(struct net_device *dev)
+ +{
+ +      struct virtnet_info *vi = netdev_priv(dev);
+ +      struct send_queue *sq;
+ +      unsigned int qp;
+ +
+ +      qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+ +      sq = &vi->sq[qp];
+ +
+ +      virtqueue_kick(sq->vq);
+ +}
+ +
+ +static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
+ +                             struct xdp_buff *xdp)
   {
         struct virtio_net_hdr_mrg_rxbuf *hdr;
         unsigned int len;
@@@ -420,19 -408,10 +420,19 @@@
                 return false;
         }
   
- -      virtqueue_kick(sq->vq);
         return true;
   }
   
+ +static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+ +{
+ +      struct virtnet_info *vi = netdev_priv(dev);
+ +      bool sent = __virtnet_xdp_xmit(vi, xdp);
+ +
+ +      if (!sent)
+ +              return -ENOSPC;
+ +      return 0;
+ +}
+ +
   static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
   {
         return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
@@@ -505,8 -484,7 +505,8 @@@ static struct sk_buff *receive_small(st
                                      struct virtnet_info *vi,
                                      struct receive_queue *rq,
                                      void *buf, void *ctx,
- -                                   unsigned int len)
+ +                                   unsigned int len,
+ +                                   bool *xdp_xmit)
   {
         struct sk_buff *skb;
         struct bpf_prog *xdp_prog;
@@@ -516,7 -494,7 +516,7 @@@
         unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
                               SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
         struct page *page = virt_to_head_page(buf);
- -      unsigned int delta = 0;
+ +      unsigned int delta = 0, err;
         struct page *xdp_page;
         len -= vi->hdr_len;
   
@@@ -554,7 -532,6 +554,7 @@@
   
                 xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
                 xdp.data = xdp.data_hard_start + xdp_headroom;
+ +              xdp_set_data_meta_invalid(&xdp);
                 xdp.data_end = xdp.data + len;
                 orig_data = xdp.data;
                 act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@@ -565,16 -542,8 +565,16 @@@
                         delta = orig_data - xdp.data;
                         break;
                 case XDP_TX:
- -                      if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp)))
+ +                      if (unlikely(!__virtnet_xdp_xmit(vi, &xdp)))
                                 trace_xdp_exception(vi->dev, xdp_prog, act);
+ +                      else
+ +                              *xdp_xmit = true;
+ +                      rcu_read_unlock();
+ +                      goto xdp_xmit;
+ +              case XDP_REDIRECT:
+ +                      err = xdp_do_redirect(dev, &xdp, xdp_prog);
+ +                      if (!err)
+ +                              *xdp_xmit = true;
                         rcu_read_unlock();
                         goto xdp_xmit;
                 default:
@@@ -635,8 -604,7 +635,8 @@@ static struct sk_buff *receive_mergeabl
                                          struct receive_queue *rq,
                                          void *buf,
                                          void *ctx,
- -                                       unsigned int len)
+ +                                       unsigned int len,
+ +                                       bool *xdp_xmit)
   {
         struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
         u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
@@@ -646,7 -614,6 +646,7 @@@
         struct bpf_prog *xdp_prog;
         unsigned int truesize;
         unsigned int headroom = mergeable_ctx_to_headroom(ctx);
+ +      int err;
   
         head_skb = NULL;
   
@@@ -687,13 -654,9 +687,13 @@@
                 data = page_address(xdp_page) + offset;
                 xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
                 xdp.data = data + vi->hdr_len;
+ +              xdp_set_data_meta_invalid(&xdp);
                 xdp.data_end = xdp.data + (len - vi->hdr_len);
                 act = bpf_prog_run_xdp(xdp_prog, &xdp);
   
+ +              if (act != XDP_PASS)
+ +                      ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
+ +
                 switch (act) {
                 case XDP_PASS:
                         /* recalculate offset to account for any header
@@@ -709,24 -672,18 +709,24 @@@
                                 put_page(page);
                                 head_skb = page_to_skb(vi, rq, xdp_page,
                                                        offset, len, PAGE_SIZE);
- -                              ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
                                 return head_skb;
                         }
                         break;
                 case XDP_TX:
- -                      if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp)))
+ +                      if (unlikely(!__virtnet_xdp_xmit(vi, &xdp)))
                                 trace_xdp_exception(vi->dev, xdp_prog, act);
- -                      ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
+ +                      else
+ +                              *xdp_xmit = true;
                         if (unlikely(xdp_page != page))
                                 goto err_xdp;
                         rcu_read_unlock();
                         goto xdp_xmit;
+ +              case XDP_REDIRECT:
+ +                      err = xdp_do_redirect(dev, &xdp, xdp_prog);
+ +                      if (!err)
+ +                              *xdp_xmit = true;
+ +                      rcu_read_unlock();
+ +                      goto xdp_xmit;
                 default:
                         bpf_warn_invalid_xdp_action(act);
                 case XDP_ABORTED:
@@@ -734,6 -691,7 +734,6 @@@
                 case XDP_DROP:
                         if (unlikely(xdp_page != page))
                                 __free_pages(xdp_page, 0);
- -                      ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
                         goto err_xdp;
                 }
         }
@@@ -831,7 -789,7 +831,7 @@@ xdp_xmit
   }
   
   static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
- -                     void *buf, unsigned int len, void **ctx)
+ +                     void *buf, unsigned int len, void **ctx, bool *xdp_xmit)
   {
         struct net_device *dev = vi->dev;
         struct sk_buff *skb;
@@@ -852,11 -810,11 +852,11 @@@
         }
   
         if (vi->mergeable_rx_bufs)
- -              skb = receive_mergeable(dev, vi, rq, buf, ctx, len);
+ +              skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit);
         else if (vi->big_packets)
                 skb = receive_big(dev, vi, rq, buf, len);
         else
- -              skb = receive_small(dev, vi, rq, buf, ctx, len);
+ +              skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit);
   
         if (unlikely(!skb))
                 return 0;
@@@ -1030,7 -988,6 +1030,6 @@@ static bool try_fill_recv(struct virtne
         int err;
         bool oom;
   
-       gfp |= __GFP_COLD;
         do {
                 if (vi->mergeable_rx_bufs)
                         err = add_recvbuf_mergeable(vi, rq, gfp);
@@@ -1114,7 -1071,7 +1113,7 @@@ static void refill_work(struct work_str
         }
   }
   
- -static int virtnet_receive(struct receive_queue *rq, int budget)
+ +static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit)
   {
         struct virtnet_info *vi = rq->vq->vdev->priv;
         unsigned int len, received = 0, bytes = 0;
@@@ -1126,13 -1083,13 +1125,13 @@@
   
                 while (received < budget &&
                        (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
- -                      bytes += receive_buf(vi, rq, buf, len, ctx);
+ +                      bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit);
                         received++;
                 }
         } else {
                 while (received < budget &&
                        (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
- -                      bytes += receive_buf(vi, rq, buf, len, NULL);
+ +                      bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit);
                         received++;
                 }
         }
@@@ -1204,19 -1161,15 +1203,19 @@@ static int virtnet_poll(struct napi_str
         struct receive_queue *rq =
                 container_of(napi, struct receive_queue, napi);
         unsigned int received;
+ +      bool xdp_xmit = false;
   
         virtnet_poll_cleantx(rq);
   
- -      received = virtnet_receive(rq, budget);
+ +      received = virtnet_receive(rq, budget, &xdp_xmit);
   
         /* Out of packets? */
         if (received < budget)
                 virtqueue_napi_complete(napi, rq->vq, received);
   
+ +      if (xdp_xmit)
+ +              xdp_do_flush_map();
+ +
         return received;
   }
   
@@@ -2088,7 -2041,7 +2087,7 @@@ static u32 virtnet_xdp_query(struct net
         return 0;
   }
   
- -static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+ +static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
   {
         switch (xdp->command) {
         case XDP_SETUP_PROG:
@@@ -2115,9 -2068,7 +2114,9 @@@ static const struct net_device_ops virt
   #ifdef CONFIG_NET_POLL_CONTROLLER
         .ndo_poll_controller = virtnet_netpoll,
   #endif
- -      .ndo_xdp                = virtnet_xdp,
+ +      .ndo_bpf                = virtnet_xdp,
+ +      .ndo_xdp_xmit           = virtnet_xdp_xmit,
+ +      .ndo_xdp_flush          = virtnet_xdp_flush,
         .ndo_features_check     = passthru_features_check,
   };
   
diff --combined fs/fuse/inode.c

index a79e320349cd79b8032feb20cd17c4fd5f25fd64,5c0f0d689fb2fd364be862283774236efbcc687d..2f504d615d9236663bfcc7c38eca354ce472897d
--- 1/fs/fuse/inode.c
--- 2/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@@ -31,7 -31,7 +31,7 @@@ static struct kmem_cache *fuse_inode_ca
   struct list_head fuse_conn_list;
   DEFINE_MUTEX(fuse_mutex);
   
- -static int set_global_limit(const char *val, struct kernel_param *kp);
+ +static int set_global_limit(const char *val, const struct kernel_param *kp);
   
   unsigned max_user_bgreq;
   module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
@@@ -823,7 -823,7 +823,7 @@@ static void sanitize_global_limit(unsig
                 *limit = (1 << 16) - 1;
   }
   
- -static int set_global_limit(const char *val, struct kernel_param *kp)
+ +static int set_global_limit(const char *val, const struct kernel_param *kp)
   {
         int rv;
   
@@@ -1273,9 -1273,9 +1273,9 @@@ static int __init fuse_fs_init(void
         int err;
   
         fuse_inode_cachep = kmem_cache_create("fuse_inode",
-                                             sizeof(struct fuse_inode), 0,
-                                             SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT,
-                                             fuse_inode_init_once);
+                       sizeof(struct fuse_inode), 0,
+                       SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
+                       fuse_inode_init_once);
         err = -ENOMEM;
         if (!fuse_inode_cachep)
                 goto out;
diff --combined fs/ocfs2/dlmfs/dlmfs.c

index 988137de08f510ba8c3c98d4f7ed4b6b53c76f17,edce7b5f16043d65ef07fd88f39c154bc9435c79..9c7c18c0e129ea3d6b86e8a896102c7bbbe9af99
--- 1/fs/ocfs2/dlmfs/dlmfs.c
--- 2/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@@ -88,13 -88,13 +88,13 @@@ struct workqueue_struct *user_dlm_worke
    */
   #define DLMFS_CAPABILITIES "bast stackglue"
   static int param_set_dlmfs_capabilities(const char *val,
- -                                      struct kernel_param *kp)
+ +                                      const struct kernel_param *kp)
   {
         printk(KERN_ERR "%s: readonly parameter\n", kp->name);
         return -EINVAL;
   }
   static int param_get_dlmfs_capabilities(char *buffer,
- -                                      struct kernel_param *kp)
+ +                                      const struct kernel_param *kp)
   {
         return strlcpy(buffer, DLMFS_CAPABILITIES,
                        strlen(DLMFS_CAPABILITIES) + 1);
@@@ -670,7 -670,6 +670,6 @@@ static void __exit exit_dlmfs_fs(void
   {
         unregister_filesystem(&dlmfs_fs_type);
   
-       flush_workqueue(user_dlm_worker);
         destroy_workqueue(user_dlm_worker);
   
         /*
diff --combined include/linux/filter.h

index 0cd02ff4ae3083236ac6457ea154082126735e88,42197b16dd78695b507809e6df4d4a8266deafbc..80b5b482cb4617ef0fc7f762edf93921ce562cf2
--- 1/include/linux/filter.h
--- 2/include/linux/filter.h
+++ b/include/linux/filter.h
@@@ -454,13 -454,11 +454,11 @@@ struct bpf_binary_header 
   
   struct bpf_prog {
         u16                     pages;          /* Number of allocated pages */
-       kmemcheck_bitfield_begin(meta);
         u16                     jited:1,        /* Is our filter JIT'ed? */
                                 locked:1,       /* Program image locked? */
                                 gpl_compatible:1, /* Is filter GPL compatible? */
                                 cb_access:1,    /* Is control block accessed? */
                                 dst_needed:1;   /* Do we need dst entry? */
-       kmemcheck_bitfield_end(meta);
         enum bpf_prog_type      type;           /* Type of BPF program */
         u32                     len;            /* Number of filter blocks */
         u32                     jited_len;      /* Size of jited insns in bytes */
@@@ -482,36 -480,30 +480,36 @@@ struct sk_filter 
         struct bpf_prog *prog;
   };
   
- -#define BPF_PROG_RUN(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
+ +#define BPF_PROG_RUN(filter, ctx)  (*(filter)->bpf_func)(ctx, (filter)->insnsi)
   
   #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
   
   struct bpf_skb_data_end {
         struct qdisc_skb_cb qdisc_cb;
+ +      void *data_meta;
         void *data_end;
   };
   
   struct xdp_buff {
         void *data;
         void *data_end;
+ +      void *data_meta;
         void *data_hard_start;
   };
   
- -/* compute the linear packet data range [data, data_end) which
- - * will be accessed by cls_bpf, act_bpf and lwt programs
+ +/* Compute the linear packet data range [data, data_end) which
+ + * will be accessed by various program types (cls_bpf, act_bpf,
+ + * lwt, ...). Subsystems allowing direct data access must (!)
+ + * ensure that cb[] area can be written to when BPF program is
+ + * invoked (otherwise cb[] save/restore is necessary).
    */
- -static inline void bpf_compute_data_end(struct sk_buff *skb)
+ +static inline void bpf_compute_data_pointers(struct sk_buff *skb)
   {
         struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
   
         BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb));
- -      cb->data_end = skb->data + skb_headlen(skb);
+ +      cb->data_meta = skb->data - skb_metadata_len(skb);
+ +      cb->data_end  = skb->data + skb_headlen(skb);
   }
   
   static inline u8 *bpf_skb_cb(struct sk_buff *skb)
@@@ -732,22 -724,8 +730,22 @@@ int xdp_do_redirect(struct net_device *
                     struct bpf_prog *prog);
   void xdp_do_flush_map(void);
   
+ +/* Drivers not supporting XDP metadata can use this helper, which
+ + * rejects any room expansion for metadata as a result.
+ + */
+ +static __always_inline void
+ +xdp_set_data_meta_invalid(struct xdp_buff *xdp)
+ +{
+ +      xdp->data_meta = xdp->data + 1;
+ +}
+ +
+ +static __always_inline bool
+ +xdp_data_meta_unsupported(const struct xdp_buff *xdp)
+ +{
+ +      return unlikely(xdp->data_meta > xdp->data);
+ +}
+ +
   void bpf_warn_invalid_xdp_action(u32 act);
- -void bpf_warn_invalid_xdp_redirect(u32 ifindex);
   
   struct sock *do_sk_redirect_map(struct sk_buff *skb);
   
diff --combined include/linux/skbuff.h

index 54fe91183a8e02fb7c165deef3c435fa134b553f,7c46fd0b8b64a12f0d3f1a01b14cb4695968b611..ed06e1c28fc72739774ee0dc83ec001825da0138
--- 1/include/linux/skbuff.h
--- 2/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -15,7 -15,6 +15,6 @@@
   #define _LINUX_SKBUFF_H
   
   #include <linux/kernel.h>
- #include <linux/kmemcheck.h>
   #include <linux/compiler.h>
   #include <linux/time.h>
   #include <linux/bug.h>
@@@ -489,9 -488,8 +488,9 @@@ int skb_zerocopy_iter_stream(struct soc
    * the end of the header data, ie. at skb->end.
    */
   struct skb_shared_info {
- -      unsigned short  _unused;
- -      unsigned char   nr_frags;
+ +      __u8            __unused;
+ +      __u8            meta_len;
+ +      __u8            nr_frags;
         __u8            tx_flags;
         unsigned short  gso_size;
         /* Warning: this field is not always filled in (UFO)! */
@@@ -500,6 -498,7 +499,6 @@@
         struct skb_shared_hwtstamps hwtstamps;
         unsigned int    gso_type;
         u32             tskey;
- -      __be32          ip6_frag_id;
   
         /*
          * Warning : all fields before dataref are cleared in __alloc_skb()
@@@ -616,7 -615,6 +615,7 @@@ typedef unsigned char *sk_buff_data_t
    *    @nf_trace: netfilter packet trace flag
    *    @protocol: Packet protocol from driver
    *    @destructor: Destruct function
+ + *    @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
    *    @_nfct: Associated connection, if any (with nfctinfo bits)
    *    @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
    *    @skb_iif: ifindex of device we arrived on
@@@ -662,12 -660,8 +661,12 @@@ struct sk_buff 
                         struct sk_buff          *prev;
   
                         union {
- -                              ktime_t         tstamp;
- -                              u64             skb_mstamp;
+ +                              struct net_device       *dev;
+ +                              /* Some protocols might use this space to store information,
+ +                               * while device pointer would be NULL.
+ +                               * UDP receive path is one user.
+ +                               */
+ +                              unsigned long           dev_scratch;
                         };
                 };
                 struct rb_node  rbnode; /* used in netem & tcp stack */
@@@ -675,8 -669,12 +674,8 @@@
         struct sock             *sk;
   
         union {
- -              struct net_device       *dev;
- -              /* Some protocols might use this space to store information,
- -               * while device pointer would be NULL.
- -               * UDP receive path is one user.
- -               */
- -              unsigned long           dev_scratch;
+ +              ktime_t         tstamp;
+ +              u64             skb_mstamp;
         };
         /*
          * This is the control buffer. It is free to use for every
@@@ -686,14 -684,8 +685,14 @@@
          */
         char                    cb[48] __aligned(8);
   
- -      unsigned long           _skb_refdst;
- -      void                    (*destructor)(struct sk_buff *skb);
+ +      union {
+ +              struct {
+ +                      unsigned long   _skb_refdst;
+ +                      void            (*destructor)(struct sk_buff *skb);
+ +              };
+ +              struct list_head        tcp_tsorted_anchor;
+ +      };
+ +
   #ifdef CONFIG_XFRM
         struct  sec_path        *sp;
   #endif
@@@ -711,7 -703,6 +710,6 @@@
         /* Following fields are _not_ copied in __copy_skb_header()
          * Note that queue_mapping is here mostly to fill a hole.
          */
-       kmemcheck_bitfield_begin(flags1);
         __u16                   queue_mapping;
   
   /* if you move cloned around you also must adapt those constants */
@@@ -730,7 -721,6 +728,6 @@@
                                 head_frag:1,
                                 xmit_more:1,
                                 __unused:1; /* one bit hole */
-       kmemcheck_bitfield_end(flags1);
   
         /* fields enclosed in headers_start/headers_end are copied
          * using a single memcpy() in __copy_skb_header()
@@@ -778,7 -768,6 +775,7 @@@
         __u8                    remcsum_offload:1;
   #ifdef CONFIG_NET_SWITCHDEV
         __u8                    offload_fwd_mark:1;
+ +      __u8                    offload_mr_fwd_mark:1;
   #endif
   #ifdef CONFIG_NET_CLS_ACT
         __u8                    tc_skip_classify:1;
@@@ -1464,9 -1453,28 +1461,9 @@@ static inline int skb_header_unclone(st
         return 0;
   }
   
- -/**
- - *    skb_header_release - release reference to header
- - *    @skb: buffer to operate on
- - *
- - *    Drop a reference to the header part of the buffer.  This is done
- - *    by acquiring a payload reference.  You must not read from the header
- - *    part of skb->data after this.
- - *    Note : Check if you can use __skb_header_release() instead.
- - */
- -static inline void skb_header_release(struct sk_buff *skb)
- -{
- -      BUG_ON(skb->nohdr);
- -      skb->nohdr = 1;
- -      atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref);
- -}
- -
   /**
    *    __skb_header_release - release reference to header
    *    @skb: buffer to operate on
- - *
- - *    Variant of skb_header_release() assuming skb is private to caller.
- - *    We can avoid one atomic operation.
    */
   static inline void __skb_header_release(struct sk_buff *skb)
   {
@@@ -2664,7 -2672,7 +2661,7 @@@ static inline struct page *__dev_alloc_
          * 4.  __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to
          *     code in gfp_to_alloc_flags that should be enforcing this.
          */
-       gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC;
+       gfp_mask |= __GFP_COMP | __GFP_MEMALLOC;
   
         return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
   }
@@@ -3157,12 -3165,6 +3154,12 @@@ static inline int __skb_grow_rcsum(stru
         return __skb_grow(skb, len);
   }
   
+ +#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
+ +#define skb_rb_first(root) rb_to_skb(rb_first(root))
+ +#define skb_rb_last(root)  rb_to_skb(rb_last(root))
+ +#define skb_rb_next(skb)   rb_to_skb(rb_next(&(skb)->rbnode))
+ +#define skb_rb_prev(skb)   rb_to_skb(rb_prev(&(skb)->rbnode))
+ +
   #define skb_queue_walk(queue, skb) \
                 for (skb = (queue)->next;                                       \
                      skb != (struct sk_buff *)(queue);                          \
@@@ -3177,18 -3179,6 +3174,18 @@@
                 for (; skb != (struct sk_buff *)(queue);                        \
                      skb = skb->next)
   
+ +#define skb_rbtree_walk(skb, root)                                            \
+ +              for (skb = skb_rb_first(root); skb != NULL;                     \
+ +                   skb = skb_rb_next(skb))
+ +
+ +#define skb_rbtree_walk_from(skb)                                             \
+ +              for (; skb != NULL;                                             \
+ +                   skb = skb_rb_next(skb))
+ +
+ +#define skb_rbtree_walk_from_safe(skb, tmp)                                   \
+ +              for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL);      \
+ +                   skb = tmp)
+ +
   #define skb_queue_walk_from_safe(queue, skb, tmp)                             \
                 for (tmp = skb->next;                                           \
                      skb != (struct sk_buff *)(queue);                          \
@@@ -3426,69 -3416,6 +3423,69 @@@ static inline ktime_t net_invalid_times
         return 0;
   }
   
+ +static inline u8 skb_metadata_len(const struct sk_buff *skb)
+ +{
+ +      return skb_shinfo(skb)->meta_len;
+ +}
+ +
+ +static inline void *skb_metadata_end(const struct sk_buff *skb)
+ +{
+ +      return skb_mac_header(skb);
+ +}
+ +
+ +static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
+ +                                        const struct sk_buff *skb_b,
+ +                                        u8 meta_len)
+ +{
+ +      const void *a = skb_metadata_end(skb_a);
+ +      const void *b = skb_metadata_end(skb_b);
+ +      /* Using more efficient varaiant than plain call to memcmp(). */
+ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+ +      u64 diffs = 0;
+ +
+ +      switch (meta_len) {
+ +#define __it(x, op) (x -= sizeof(u##op))
+ +#define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
+ +      case 32: diffs |= __it_diff(a, b, 64);
+ +      case 24: diffs |= __it_diff(a, b, 64);
+ +      case 16: diffs |= __it_diff(a, b, 64);
+ +      case  8: diffs |= __it_diff(a, b, 64);
+ +              break;
+ +      case 28: diffs |= __it_diff(a, b, 64);
+ +      case 20: diffs |= __it_diff(a, b, 64);
+ +      case 12: diffs |= __it_diff(a, b, 64);
+ +      case  4: diffs |= __it_diff(a, b, 32);
+ +              break;
+ +      }
+ +      return diffs;
+ +#else
+ +      return memcmp(a - meta_len, b - meta_len, meta_len);
+ +#endif
+ +}
+ +
+ +static inline bool skb_metadata_differs(const struct sk_buff *skb_a,
+ +                                      const struct sk_buff *skb_b)
+ +{
+ +      u8 len_a = skb_metadata_len(skb_a);
+ +      u8 len_b = skb_metadata_len(skb_b);
+ +
+ +      if (!(len_a | len_b))
+ +              return false;
+ +
+ +      return len_a != len_b ?
+ +             true : __skb_metadata_differs(skb_a, skb_b, len_a);
+ +}
+ +
+ +static inline void skb_metadata_set(struct sk_buff *skb, u8 meta_len)
+ +{
+ +      skb_shinfo(skb)->meta_len = meta_len;
+ +}
+ +
+ +static inline void skb_metadata_clear(struct sk_buff *skb)
+ +{
+ +      skb_metadata_set(skb, 0);
+ +}
+ +
   struct sk_buff *skb_clone_sk(struct sk_buff *skb);
   
   #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
diff --combined include/net/inet_sock.h

index 2135c9ba6ac30dfb8bb8f7194d863c37cae07dca,8e51b4a69088c211f79b1d5e26029c56df93b99a..39efb968b7a4cc9f5ea06ce5e5670ef60c5f7505
--- 1/include/net/inet_sock.h
--- 2/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@@ -17,7 -17,6 +17,6 @@@
   #define _INET_SOCK_H
   
   #include <linux/bitops.h>
- #include <linux/kmemcheck.h>
   #include <linux/string.h>
   #include <linux/types.h>
   #include <linux/jhash.h>
@@@ -84,7 -83,6 +83,6 @@@ struct inet_request_sock 
   #define ireq_state            req.__req_common.skc_state
   #define ireq_family           req.__req_common.skc_family
   
-       kmemcheck_bitfield_begin(flags);
         u16                     snd_wscale : 4,
                                 rcv_wscale : 4,
                                 tstamp_ok  : 1,
@@@ -92,9 -90,7 +90,8 @@@
                                 wscale_ok  : 1,
                                 ecn_ok     : 1,
                                 acked      : 1,
- -                              no_srccheck: 1;
+ +                              no_srccheck: 1,
+ +                              smc_ok     : 1;
-       kmemcheck_bitfield_end(flags);
         u32                     ir_mark;
         union {
                 struct ip_options_rcu __rcu     *ireq_opt;
diff --combined include/net/sock.h

index f8715c5af37d4e598770dbe5c5f83246241f18d5,a63e6a8bb7e00429713a8a4cfd40c6a0729020b6..79e1a2c7912c03d8281d449609d57cc909138a3b
--- 1/include/net/sock.h
--- 2/include/net/sock.h
+++ b/include/net/sock.h
@@@ -60,7 -60,7 +60,7 @@@
   #include <linux/sched.h>
   #include <linux/wait.h>
   #include <linux/cgroup-defs.h>
- -
+ +#include <linux/rbtree.h>
   #include <linux/filter.h>
   #include <linux/rculist_nulls.h>
   #include <linux/poll.h>
@@@ -267,7 -267,6 +267,7 @@@ struct sock_common 
     *   @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
     *   @sk_gso_max_size: Maximum GSO segment size to build
     *   @sk_gso_max_segs: Maximum number of GSO segments
+ +  *   @sk_pacing_shift: scaling factor for TCP Small Queues
     *   @sk_lingertime: %SO_LINGER l_linger setting
     *   @sk_backlog: always used with the per-socket spinlock held
     *   @sk_callback_lock: used with the callbacks in the end of this struct
@@@ -398,10 -397,7 +398,10 @@@ struct sock 
         int                     sk_wmem_queued;
         refcount_t              sk_wmem_alloc;
         unsigned long           sk_tsq_flags;
- -      struct sk_buff          *sk_send_head;
+ +      union {
+ +              struct sk_buff  *sk_send_head;
+ +              struct rb_root  tcp_rtx_queue;
+ +      };
         struct sk_buff_head     sk_write_queue;
         __s32                   sk_peek_off;
         int                     sk_write_pending;
@@@ -440,7 -436,6 +440,6 @@@
   #define SK_FL_TYPE_MASK    0xffff0000
   #endif
   
-       kmemcheck_bitfield_begin(flags);
         unsigned int            sk_padding : 1,
                                 sk_kern_sock : 1,
                                 sk_no_check_tx : 1,
@@@ -449,10 -444,7 +448,8 @@@
                                 sk_protocol  : 8,
                                 sk_type      : 16;
   #define SK_PROTOCOL_MAX U8_MAX
-       kmemcheck_bitfield_end(flags);
- 
         u16                     sk_gso_max_segs;
+ +      u8                      sk_pacing_shift;
         unsigned long           sk_lingertime;
         struct proto            *sk_prot_creator;
         rwlock_t                sk_callback_lock;
@@@ -739,10 -731,10 +736,10 @@@ static inline void sk_add_bind_node(str
    *
    */
   #define sk_for_each_entry_offset_rcu(tpos, pos, head, offset)                \
- -      for (pos = rcu_dereference((head)->first);                             \
+ +      for (pos = rcu_dereference(hlist_first_rcu(head));                     \
              pos != NULL &&                                                    \
                 ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;});       \
- -           pos = rcu_dereference(pos->next))
+ +           pos = rcu_dereference(hlist_next_rcu(pos)))
   
   static inline struct user_namespace *sk_user_ns(struct sock *sk)
   {
@@@ -1103,18 -1095,14 +1100,18 @@@ struct proto 
          */
         unsigned long           *memory_pressure;
         long                    *sysctl_mem;
+ +
         int                     *sysctl_wmem;
         int                     *sysctl_rmem;
+ +      u32                     sysctl_wmem_offset;
+ +      u32                     sysctl_rmem_offset;
+ +
         int                     max_header;
         bool                    no_autobind;
   
         struct kmem_cache       *slab;
         unsigned int            obj_size;
-       int                     slab_flags;
+       slab_flags_t            slab_flags;
   
         struct percpu_counter   *orphan_count;
   
@@@ -2396,22 -2384,4 +2393,22 @@@ extern int sysctl_optmem_max
   extern __u32 sysctl_wmem_default;
   extern __u32 sysctl_rmem_default;
   
+ +static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
+ +{
+ +      /* Does this proto have per netns sysctl_wmem ? */
+ +      if (proto->sysctl_wmem_offset)
+ +              return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
+ +
+ +      return *proto->sysctl_wmem;
+ +}
+ +
+ +static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
+ +{
+ +      /* Does this proto have per netns sysctl_rmem ? */
+ +      if (proto->sysctl_rmem_offset)
+ +              return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
+ +
+ +      return *proto->sysctl_rmem;
+ +}
+ +
   #endif        /* _SOCK_H */
diff --combined init/Kconfig

index 5327146db9b50034eb9050414d65a51a7d689de3,9b8c187873fee6ac53876b8f74f90636074bdb46..7d5a6fbac56a0fe0151b104b079770efd0a32e5d
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -1386,6 -1386,15 +1386,6 @@@ config USERFAULTF
           Enable the userfaultfd() system call that allows to intercept and
           handle page faults in userland.
   
- -config PCI_QUIRKS
- -      default y
- -      bool "Enable PCI quirk workarounds" if EXPERT
- -      depends on PCI
- -      help
- -        This enables workarounds for various PCI chipset
- -        bugs/quirks. Disable this only if your target machine is
- -        unaffected by PCI quirks.
- -
   config MEMBARRIER
         bool "Enable membarrier() system call" if EXPERT
         default y
@@@ -1655,12 -1664,6 +1655,6 @@@ config HAVE_GENERIC_DMA_COHEREN
         bool
         default n
   
- config SLABINFO
-       bool
-       depends on PROC_FS
-       depends on SLAB || SLUB_DEBUG
-       default y
- 
   config RT_MUTEXES
         bool
   
diff --combined kernel/bpf/core.c

index 8a6c37762330f5d8f49214061e733f93e3c6a217,11ad089f2c747692924453ff8d0c0df60a481dc1..b9f8686a84cf1a5ee9d2b92d21579af11d8690aa
--- 1/kernel/bpf/core.c
--- 2/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@@ -85,8 -85,6 +85,6 @@@ struct bpf_prog *bpf_prog_alloc(unsigne
         if (fp == NULL)
                 return NULL;
   
-       kmemcheck_annotate_bitfield(fp, meta);
- 
         aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
         if (aux == NULL) {
                 vfree(fp);
@@@ -127,8 -125,6 +125,6 @@@ struct bpf_prog *bpf_prog_realloc(struc
         if (fp == NULL) {
                 __bpf_prog_uncharge(fp_old->aux->user, delta);
         } else {
-               kmemcheck_annotate_bitfield(fp, meta);
- 
                 memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
                 fp->pages = pages;
                 fp->aux->prog = fp;
@@@ -309,25 -305,12 +305,25 @@@ bpf_get_prog_addr_region(const struct b
   
   static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
   {
+ +      const char *end = sym + KSYM_NAME_LEN;
+ +
         BUILD_BUG_ON(sizeof("bpf_prog_") +
- -                   sizeof(prog->tag) * 2 + 1 > KSYM_NAME_LEN);
+ +                   sizeof(prog->tag) * 2 +
+ +                   /* name has been null terminated.
+ +                    * We should need +1 for the '_' preceding
+ +                    * the name.  However, the null character
+ +                    * is double counted between the name and the
+ +                    * sizeof("bpf_prog_") above, so we omit
+ +                    * the +1 here.
+ +                    */
+ +                   sizeof(prog->aux->name) > KSYM_NAME_LEN);
   
         sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
         sym  = bin2hex(sym, prog->tag, sizeof(prog->tag));
- -      *sym = 0;
+ +      if (prog->aux->name[0])
+ +              snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
+ +      else
+ +              *sym = 0;
   }
   
   static __always_inline unsigned long
@@@ -675,8 -658,6 +671,6 @@@ static struct bpf_prog *bpf_prog_clone_
   
         fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
         if (fp != NULL) {
-               kmemcheck_annotate_bitfield(fp, meta);
- 
                 /* aux->prog still points to the fp_other one, so
                  * when promoting the clone to the real program,
                  * this still needs to be adapted.
@@@ -1380,13 -1361,7 +1374,13 @@@ struct bpf_prog *bpf_prog_select_runtim
          * valid program, which in this case would simply not
          * be JITed, but falls back to the interpreter.
          */
- -      fp = bpf_int_jit_compile(fp);
+ +      if (!bpf_prog_is_dev_bound(fp->aux)) {
+ +              fp = bpf_int_jit_compile(fp);
+ +      } else {
+ +              *err = bpf_prog_offload_compile(fp);
+ +              if (*err)
+ +                      return fp;
+ +      }
         bpf_prog_lock_ro(fp);
   
         /* The tail call compatibility check can only be done at
@@@ -1400,163 -1375,11 +1394,163 @@@
   }
   EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
   
+ +static unsigned int __bpf_prog_ret1(const void *ctx,
+ +                                  const struct bpf_insn *insn)
+ +{
+ +      return 1;
+ +}
+ +
+ +static struct bpf_prog_dummy {
+ +      struct bpf_prog prog;
+ +} dummy_bpf_prog = {
+ +      .prog = {
+ +              .bpf_func = __bpf_prog_ret1,
+ +      },
+ +};
+ +
+ +/* to avoid allocating empty bpf_prog_array for cgroups that
+ + * don't have bpf program attached use one global 'empty_prog_array'
+ + * It will not be modified the caller of bpf_prog_array_alloc()
+ + * (since caller requested prog_cnt == 0)
+ + * that pointer should be 'freed' by bpf_prog_array_free()
+ + */
+ +static struct {
+ +      struct bpf_prog_array hdr;
+ +      struct bpf_prog *null_prog;
+ +} empty_prog_array = {
+ +      .null_prog = NULL,
+ +};
+ +
+ +struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
+ +{
+ +      if (prog_cnt)
+ +              return kzalloc(sizeof(struct bpf_prog_array) +
+ +                             sizeof(struct bpf_prog *) * (prog_cnt + 1),
+ +                             flags);
+ +
+ +      return &empty_prog_array.hdr;
+ +}
+ +
+ +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
+ +{
+ +      if (!progs ||
+ +          progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
+ +              return;
+ +      kfree_rcu(progs, rcu);
+ +}
+ +
+ +int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
+ +{
+ +      struct bpf_prog **prog;
+ +      u32 cnt = 0;
+ +
+ +      rcu_read_lock();
+ +      prog = rcu_dereference(progs)->progs;
+ +      for (; *prog; prog++)
+ +              cnt++;
+ +      rcu_read_unlock();
+ +      return cnt;
+ +}
+ +
+ +int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
+ +                              __u32 __user *prog_ids, u32 cnt)
+ +{
+ +      struct bpf_prog **prog;
+ +      u32 i = 0, id;
+ +
+ +      rcu_read_lock();
+ +      prog = rcu_dereference(progs)->progs;
+ +      for (; *prog; prog++) {
+ +              id = (*prog)->aux->id;
+ +              if (copy_to_user(prog_ids + i, &id, sizeof(id))) {
+ +                      rcu_read_unlock();
+ +                      return -EFAULT;
+ +              }
+ +              if (++i == cnt) {
+ +                      prog++;
+ +                      break;
+ +              }
+ +      }
+ +      rcu_read_unlock();
+ +      if (*prog)
+ +              return -ENOSPC;
+ +      return 0;
+ +}
+ +
+ +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
+ +                              struct bpf_prog *old_prog)
+ +{
+ +      struct bpf_prog **prog = progs->progs;
+ +
+ +      for (; *prog; prog++)
+ +              if (*prog == old_prog) {
+ +                      WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
+ +                      break;
+ +              }
+ +}
+ +
+ +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
+ +                      struct bpf_prog *exclude_prog,
+ +                      struct bpf_prog *include_prog,
+ +                      struct bpf_prog_array **new_array)
+ +{
+ +      int new_prog_cnt, carry_prog_cnt = 0;
+ +      struct bpf_prog **existing_prog;
+ +      struct bpf_prog_array *array;
+ +      int new_prog_idx = 0;
+ +
+ +      /* Figure out how many existing progs we need to carry over to
+ +       * the new array.
+ +       */
+ +      if (old_array) {
+ +              existing_prog = old_array->progs;
+ +              for (; *existing_prog; existing_prog++) {
+ +                      if (*existing_prog != exclude_prog &&
+ +                          *existing_prog != &dummy_bpf_prog.prog)
+ +                              carry_prog_cnt++;
+ +                      if (*existing_prog == include_prog)
+ +                              return -EEXIST;
+ +              }
+ +      }
+ +
+ +      /* How many progs (not NULL) will be in the new array? */
+ +      new_prog_cnt = carry_prog_cnt;
+ +      if (include_prog)
+ +              new_prog_cnt += 1;
+ +
+ +      /* Do we have any prog (not NULL) in the new array? */
+ +      if (!new_prog_cnt) {
+ +              *new_array = NULL;
+ +              return 0;
+ +      }
+ +
+ +      /* +1 as the end of prog_array is marked with NULL */
+ +      array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
+ +      if (!array)
+ +              return -ENOMEM;
+ +
+ +      /* Fill in the new prog array */
+ +      if (carry_prog_cnt) {
+ +              existing_prog = old_array->progs;
+ +              for (; *existing_prog; existing_prog++)
+ +                      if (*existing_prog != exclude_prog &&
+ +                          *existing_prog != &dummy_bpf_prog.prog)
+ +                              array->progs[new_prog_idx++] = *existing_prog;
+ +      }
+ +      if (include_prog)
+ +              array->progs[new_prog_idx++] = include_prog;
+ +      array->progs[new_prog_idx] = NULL;
+ +      *new_array = array;
+ +      return 0;
+ +}
+ +
   static void bpf_prog_free_deferred(struct work_struct *work)
   {
         struct bpf_prog_aux *aux;
   
         aux = container_of(work, struct bpf_prog_aux, work);
+ +      if (bpf_prog_is_dev_bound(aux))
+ +              bpf_prog_offload_destroy(aux->prog);
         bpf_jit_free(aux->prog);
   }
   
@@@ -1669,8 -1492,5 +1663,8 @@@ int __weak skb_copy_bits(const struct s
   
   EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
   
+ +/* These are only used within the BPF_SYSCALL code */
+ +#ifdef CONFIG_BPF_SYSCALL
   EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
   EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);
+ +#endif
diff --combined net/core/skbuff.c

index 8134c00df6c2c65e5d9c569a86590e2da2b24dae,9c68555bb9065c0966cc98aa98bb9c20845d13c2..6b0ff396fa9dc58fed483597d459c66243be4cd2
--- 1/net/core/skbuff.c
--- 2/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@@ -41,7 -41,6 +41,6 @@@
   #include <linux/module.h>
   #include <linux/types.h>
   #include <linux/kernel.h>
- #include <linux/kmemcheck.h>
   #include <linux/mm.h>
   #include <linux/interrupt.h>
   #include <linux/in.h>
@@@ -234,14 -233,12 +233,12 @@@ struct sk_buff *__alloc_skb(unsigned in
         shinfo = skb_shinfo(skb);
         memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
         atomic_set(&shinfo->dataref, 1);
-       kmemcheck_annotate_variable(shinfo->destructor_arg);
   
         if (flags & SKB_ALLOC_FCLONE) {
                 struct sk_buff_fclones *fclones;
   
                 fclones = container_of(skb, struct sk_buff_fclones, skb1);
   
-               kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
                 skb->fclone = SKB_FCLONE_ORIG;
                 refcount_set(&fclones->fclone_ref, 1);
   
@@@ -301,7 -298,6 +298,6 @@@ struct sk_buff *__build_skb(void *data
         shinfo = skb_shinfo(skb);
         memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
         atomic_set(&shinfo->dataref, 1);
-       kmemcheck_annotate_variable(shinfo->destructor_arg);
   
         return skb;
   }
@@@ -357,7 -353,7 +353,7 @@@ static void *__netdev_alloc_frag(unsign
    */
   void *netdev_alloc_frag(unsigned int fragsz)
   {
-       return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+       return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
   }
   EXPORT_SYMBOL(netdev_alloc_frag);
   
@@@ -370,7 -366,7 +366,7 @@@ static void *__napi_alloc_frag(unsigne
   
   void *napi_alloc_frag(unsigned int fragsz)
   {
-       return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+       return __napi_alloc_frag(fragsz, GFP_ATOMIC);
   }
   EXPORT_SYMBOL(napi_alloc_frag);
   
@@@ -1283,7 -1279,6 +1279,6 @@@ struct sk_buff *skb_clone(struct sk_buf
                 if (!n)
                         return NULL;
   
-               kmemcheck_annotate_bitfield(n, flags1);
                 n->fclone = SKB_FCLONE_UNAVAILABLE;
         }
   
@@@ -1354,7 -1349,8 +1349,7 @@@ struct sk_buff *skb_copy(const struct s
         /* Set the tail pointer and length */
         skb_put(n, skb->len);
   
- -      if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
- -              BUG();
+ +      BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
   
         copy_skb_header(n, skb);
         return n;
@@@ -1452,7 -1448,8 +1447,7 @@@ int pskb_expand_head(struct sk_buff *sk
   
         BUG_ON(nhead < 0);
   
- -      if (skb_shared(skb))
- -              BUG();
+ +      BUG_ON(skb_shared(skb));
   
         size = SKB_DATA_ALIGN(size);
   
@@@ -1511,8 -1508,6 +1506,8 @@@
         skb->nohdr    = 0;
         atomic_set(&skb_shinfo(skb)->dataref, 1);
   
+ +      skb_metadata_clear(skb);
+ +
         /* It is not generally safe to change skb->truesize.
          * For the moment, we really care of rx path, or
          * when skb is orphaned (not attached to a socket).
@@@ -1597,8 -1592,9 +1592,8 @@@ struct sk_buff *skb_copy_expand(const s
                 head_copy_off = newheadroom - head_copy_len;
   
         /* Copy the linear header and data. */
- -      if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
- -                        skb->len + head_copy_len))
- -              BUG();
+ +      BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
+ +                           skb->len + head_copy_len));
   
         copy_skb_header(n, skb);
   
@@@ -1879,8 -1875,8 +1874,8 @@@ void *__pskb_pull_tail(struct sk_buff *
                         return NULL;
         }
   
- -      if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
- -              BUG();
+ +      BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
+ +                           skb_tail_pointer(skb), delta));
   
         /* Optimization: no fragments, no reasons to preestimate
          * size of pulled pages. Superb.
@@@ -2851,15 -2847,12 +2846,15 @@@ EXPORT_SYMBOL(skb_queue_purge)
    */
   void skb_rbtree_purge(struct rb_root *root)
   {
- -      struct sk_buff *skb, *next;
+ +      struct rb_node *p = rb_first(root);
   
- -      rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
- -              kfree_skb(skb);
+ +      while (p) {
+ +              struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
   
- -      *root = RB_ROOT;
+ +              p = rb_next(p);
+ +              rb_erase(&skb->rbnode, root);
+ +              kfree_skb(skb);
+ +      }
   }
   
   /**
@@@ -4768,7 -4761,6 +4763,7 @@@ EXPORT_SYMBOL(kfree_skb_partial)
   bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
                       bool *fragstolen, int *delta_truesize)
   {
+ +      struct skb_shared_info *to_shinfo, *from_shinfo;
         int i, delta, len = from->len;
   
         *fragstolen = false;
@@@ -4783,9 -4775,7 +4778,9 @@@
                 return true;
         }
   
- -      if (skb_has_frag_list(to) || skb_has_frag_list(from))
+ +      to_shinfo = skb_shinfo(to);
+ +      from_shinfo = skb_shinfo(from);
+ +      if (to_shinfo->frag_list || from_shinfo->frag_list)
                 return false;
         if (skb_zcopy(to) || skb_zcopy(from))
                 return false;
@@@ -4794,8 -4784,8 +4789,8 @@@
                 struct page *page;
                 unsigned int offset;
   
- -              if (skb_shinfo(to)->nr_frags +
- -                  skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ +              if (to_shinfo->nr_frags +
+ +                  from_shinfo->nr_frags >= MAX_SKB_FRAGS)
                         return false;
   
                 if (skb_head_is_locked(from))
@@@ -4806,12 -4796,12 +4801,12 @@@
                 page = virt_to_head_page(from->head);
                 offset = from->data - (unsigned char *)page_address(page);
   
- -              skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
+ +              skb_fill_page_desc(to, to_shinfo->nr_frags,
                                    page, offset, skb_headlen(from));
                 *fragstolen = true;
         } else {
- -              if (skb_shinfo(to)->nr_frags +
- -                  skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
+ +              if (to_shinfo->nr_frags +
+ +                  from_shinfo->nr_frags > MAX_SKB_FRAGS)
                         return false;
   
                 delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
@@@ -4819,19 -4809,19 +4814,19 @@@
   
         WARN_ON_ONCE(delta < len);
   
- -      memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
- -             skb_shinfo(from)->frags,
- -             skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
- -      skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
+ +      memcpy(to_shinfo->frags + to_shinfo->nr_frags,
+ +             from_shinfo->frags,
+ +             from_shinfo->nr_frags * sizeof(skb_frag_t));
+ +      to_shinfo->nr_frags += from_shinfo->nr_frags;
   
         if (!skb_cloned(from))
- -              skb_shinfo(from)->nr_frags = 0;
+ +              from_shinfo->nr_frags = 0;
   
         /* if the skb is not cloned this does nothing
          * since we set nr_frags to 0.
          */
- -      for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
- -              skb_frag_ref(from, i);
+ +      for (i = 0; i < from_shinfo->nr_frags; i++)
+ +              __skb_frag_ref(&from_shinfo->frags[i]);
   
         to->truesize += delta;
         to->len += len;
diff --combined net/core/sock.c

index 13719af7b4e35d2050ccba51d44c7f691a889b37,78401fa33ce86cf7124c29283a75449f1e8351ec..c0b5b2f17412ec3ac3d4b1cf400fb2fbcabf086f
--- 1/net/core/sock.c
--- 2/net/core/sock.c
+++ b/net/core/sock.c
@@@ -1469,8 -1469,6 +1469,6 @@@ static struct sock *sk_prot_alloc(struc
                 sk = kmalloc(prot->obj_size, priority);
   
         if (sk != NULL) {
-               kmemcheck_annotate_bitfield(sk, flags);
- 
                 if (security_sk_alloc(sk, family, priority))
                         goto out_free;
   
@@@ -2346,18 -2344,16 +2344,18 @@@ int __sk_mem_raise_allocated(struct soc
   
         /* guarantee minimum buffer size under pressure */
         if (kind == SK_MEM_RECV) {
- -              if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
+ +              if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
                         return 1;
   
         } else { /* SK_MEM_SEND */
+ +              int wmem0 = sk_get_wmem0(sk, prot);
+ +
                 if (sk->sk_type == SOCK_STREAM) {
- -                      if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
+ +                      if (sk->sk_wmem_queued < wmem0)
                                 return 1;
- -              } else if (refcount_read(&sk->sk_wmem_alloc) <
- -                         prot->sysctl_wmem[0])
+ +              } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
                                 return 1;
+ +              }
         }
   
         if (sk_has_memory_pressure(sk)) {
@@@ -2687,7 -2683,7 +2685,7 @@@ void sock_init_data(struct socket *sock
         sk_init_common(sk);
         sk->sk_send_head        =       NULL;
   
- -      init_timer(&sk->sk_timer);
+ +      timer_setup(&sk->sk_timer, NULL, 0);
   
         sk->sk_allocation       =       GFP_KERNEL;
         sk->sk_rcvbuf           =       sysctl_rmem_default;
@@@ -2746,7 -2742,6 +2744,7 @@@
   
         sk->sk_max_pacing_rate = ~0U;
         sk->sk_pacing_rate = ~0U;
+ +      sk->sk_pacing_shift = 10;
         sk->sk_incoming_cpu = -1;
         /*
          * Before updating sk_refcnt, we must commit prior changes to memory
@@@ -3045,6 -3040,7 +3043,6 @@@ struct prot_inuse 
   
   static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
   
- -#ifdef CONFIG_NET_NS
   void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
   {
         __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
@@@ -3088,6 -3084,27 +3086,6 @@@ static __init int net_inuse_init(void
   }
   
   core_initcall(net_inuse_init);
- -#else
- -static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
- -
- -void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
- -{
- -      __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
- -}
- -EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
- -
- -int sock_prot_inuse_get(struct net *net, struct proto *prot)
- -{
- -      int cpu, idx = prot->inuse_idx;
- -      int res = 0;
- -
- -      for_each_possible_cpu(cpu)
- -              res += per_cpu(prot_inuse, cpu).val[idx];
- -
- -      return res >= 0 ? res : 0;
- -}
- -EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
- -#endif
   
   static void assign_proto_idx(struct proto *prot)
   {
diff --combined net/ipv4/inet_timewait_sock.c

index a4bab81f1462c1c61bc1185f5e10146a03a139f2,d451b9f19b59da5598a37eb088ff1783f695a7e5..c690cd0d9b3f0af53c23b9a1ecc87be4098ae059
--- 1/net/ipv4/inet_timewait_sock.c
--- 2/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@@ -9,7 -9,6 +9,6 @@@
    */
   
   #include <linux/kernel.h>
- #include <linux/kmemcheck.h>
   #include <linux/slab.h>
   #include <linux/module.h>
   #include <net/inet_hashtables.h>
@@@ -142,9 -141,9 +141,9 @@@ void __inet_twsk_hashdance(struct inet_
   }
   EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
   
- -static void tw_timer_handler(unsigned long data)
+ +static void tw_timer_handler(struct timer_list *t)
   {
- -      struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
+ +      struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
   
         if (tw->tw_kill)
                 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
@@@ -167,8 -166,6 +166,6 @@@ struct inet_timewait_sock *inet_twsk_al
         if (tw) {
                 const struct inet_sock *inet = inet_sk(sk);
   
-               kmemcheck_annotate_bitfield(tw, flags);
- 
                 tw->tw_dr           = dr;
                 /* Give us an identity. */
                 tw->tw_daddr        = inet->inet_daddr;
@@@ -188,7 -185,8 +185,7 @@@
                 tw->tw_prot         = sk->sk_prot_creator;
                 atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
                 twsk_net_set(tw, sock_net(sk));
- -              setup_pinned_timer(&tw->tw_timer, tw_timer_handler,
- -                                 (unsigned long)tw);
+ +              timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED);
                 /*
                  * Because we use RCU lookups, we should not set tw_refcnt
                  * to a non null value before everything is setup for this
diff --combined net/ipv4/tcp_input.c

index dabbf1d392fb98c4ec3ef42cc814383dde9304aa,c04d60a677a79701a4ab13f07c5f1906cd58e5c4..f844c06c0676c3e23ce9df97f0ccb80f1685127c
--- 1/net/ipv4/tcp_input.c
--- 2/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@@ -76,10 -76,25 +76,10 @@@
   #include <linux/ipsec.h>
   #include <asm/unaligned.h>
   #include <linux/errqueue.h>
+ +#include <trace/events/tcp.h>
+ +#include <linux/static_key.h>
   
- -int sysctl_tcp_fack __read_mostly;
- -int sysctl_tcp_max_reordering __read_mostly = 300;
- -int sysctl_tcp_dsack __read_mostly = 1;
- -int sysctl_tcp_app_win __read_mostly = 31;
- -int sysctl_tcp_adv_win_scale __read_mostly = 1;
- -EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
- -
- -/* rfc5961 challenge ack rate limiting */
- -int sysctl_tcp_challenge_ack_limit = 1000;
- -
- -int sysctl_tcp_stdurg __read_mostly;
- -int sysctl_tcp_rfc1337 __read_mostly;
   int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
- -int sysctl_tcp_frto __read_mostly = 2;
- -int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
- -int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
- -int sysctl_tcp_early_retrans __read_mostly = 3;
- -int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
   
   #define FLAG_DATA             0x01 /* Incoming frame contained data.          */
   #define FLAG_WIN_UPDATE               0x02 /* Incoming ACK was a window update.       */
@@@ -320,7 -335,7 +320,7 @@@ static void tcp_sndbuf_expand(struct so
         sndmem *= nr_segs * per_mss;
   
         if (sk->sk_sndbuf < sndmem)
- -              sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+ +              sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
   }
   
   /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@@ -353,8 -368,8 +353,8 @@@ static int __tcp_grow_window(const stru
   {
         struct tcp_sock *tp = tcp_sk(sk);
         /* Optimize this! */
- -      int truesize = tcp_win_from_space(skb->truesize) >> 1;
- -      int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
+ +      int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+ +      int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
   
         while (tp->rcv_ssthresh <= window) {
                 if (truesize <= skb->len)
@@@ -379,7 -394,7 +379,7 @@@ static void tcp_grow_window(struct soc
                 /* Check #2. Increase window, if skb with such overhead
                  * will fit to rcvbuf in future.
                  */
- -              if (tcp_win_from_space(skb->truesize) <= skb->len)
+ +              if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
                         incr = 2 * tp->advmss;
                 else
                         incr = __tcp_grow_window(sk, skb);
@@@ -405,11 -420,11 +405,11 @@@ static void tcp_fixup_rcvbuf(struct soc
         /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
          * Allow enough cushion so that sender is not limited by our window
          */
- -      if (sysctl_tcp_moderate_rcvbuf)
+ +      if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
                 rcvmem <<= 2;
   
         if (sk->sk_rcvbuf < rcvmem)
- -              sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
+ +              sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
   }
   
   /* 4. Try to fixup all. It is made immediately after connection enters
@@@ -417,7 -432,6 +417,7 @@@
    */
   void tcp_init_buffer_space(struct sock *sk)
   {
+ +      int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
         struct tcp_sock *tp = tcp_sk(sk);
         int maxwin;
   
@@@ -436,14 -450,14 +436,14 @@@
         if (tp->window_clamp >= maxwin) {
                 tp->window_clamp = maxwin;
   
- -              if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
+ +              if (tcp_app_win && maxwin > 4 * tp->advmss)
                         tp->window_clamp = max(maxwin -
- -                                             (maxwin >> sysctl_tcp_app_win),
+ +                                             (maxwin >> tcp_app_win),
                                                4 * tp->advmss);
         }
   
         /* Force reservation of one segment. */
- -      if (sysctl_tcp_app_win &&
+ +      if (tcp_app_win &&
             tp->window_clamp > 2 * tp->advmss &&
             tp->window_clamp + tp->advmss > maxwin)
                 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
@@@ -457,16 -471,15 +457,16 @@@ static void tcp_clamp_window(struct soc
   {
         struct tcp_sock *tp = tcp_sk(sk);
         struct inet_connection_sock *icsk = inet_csk(sk);
+ +      struct net *net = sock_net(sk);
   
         icsk->icsk_ack.quick = 0;
   
- -      if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+ +      if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
             !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
             !tcp_under_memory_pressure(sk) &&
             sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
                 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
- -                                  sysctl_tcp_rmem[2]);
+ +                                  net->ipv4.sysctl_tcp_rmem[2]);
         }
         if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
                 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@@ -597,7 -610,7 +597,7 @@@ void tcp_rcv_space_adjust(struct sock *
          * <prev RTT . ><current RTT .. ><next RTT .... >
          */
   
- -      if (sysctl_tcp_moderate_rcvbuf &&
+ +      if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
             !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
                 int rcvwin, rcvmem, rcvbuf;
   
@@@ -621,11 -634,10 +621,11 @@@
                 }
   
                 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
- -              while (tcp_win_from_space(rcvmem) < tp->advmss)
+ +              while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
                         rcvmem += 128;
   
- -              rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
+ +              rcvbuf = min(rcvwin / tp->advmss * rcvmem,
+ +                           sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
                 if (rcvbuf > sk->sk_rcvbuf) {
                         sk->sk_rcvbuf = rcvbuf;
   
@@@ -769,6 -781,15 +769,6 @@@ static void tcp_rtt_estimator(struct so
         tp->srtt_us = max(1U, srtt);
   }
   
- -/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
- - * Note: TCP stack does not yet implement pacing.
- - * FQ packet scheduler can be used to implement cheap but effective
- - * TCP pacing, to smooth the burst on large writes when packets
- - * in flight is significantly lower than cwnd (or rwin)
- - */
- -int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
- -int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
- -
   static void tcp_update_pacing_rate(struct sock *sk)
   {
         const struct tcp_sock *tp = tcp_sk(sk);
@@@ -786,9 -807,9 +786,9 @@@
          *       end of slow start and should slow down.
          */
         if (tp->snd_cwnd < tp->snd_ssthresh / 2)
- -              rate *= sysctl_tcp_pacing_ss_ratio;
+ +              rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
         else
- -              rate *= sysctl_tcp_pacing_ca_ratio;
+ +              rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
   
         rate *= max(tp->snd_cwnd, tp->packets_out);
   
@@@ -842,46 -863,60 +842,46 @@@ __u32 tcp_init_cwnd(const struct tcp_so
         return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
   }
   
- -/*
- - * Packet counting of FACK is based on in-order assumptions, therefore TCP
- - * disables it when reordering is detected
- - */
- -void tcp_disable_fack(struct tcp_sock *tp)
- -{
- -      /* RFC3517 uses different metric in lost marker => reset on change */
- -      if (tcp_is_fack(tp))
- -              tp->lost_skb_hint = NULL;
- -      tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
- -}
- -
   /* Take a notice that peer is sending D-SACKs */
   static void tcp_dsack_seen(struct tcp_sock *tp)
   {
         tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
+ +      tp->rack.dsack_seen = 1;
   }
   
- -static void tcp_update_reordering(struct sock *sk, const int metric,
- -                                const int ts)
+ +/* It's reordering when higher sequence was delivered (i.e. sacked) before
+ + * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
+ + * distance is approximated in full-mss packet distance ("reordering").
+ + */
+ +static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
+ +                                    const int ts)
   {
         struct tcp_sock *tp = tcp_sk(sk);
- -      int mib_idx;
+ +      const u32 mss = tp->mss_cache;
+ +      u32 fack, metric;
   
- -      if (WARN_ON_ONCE(metric < 0))
+ +      fack = tcp_highest_sack_seq(tp);
+ +      if (!before(low_seq, fack))
                 return;
   
- -      if (metric > tp->reordering) {
- -              tp->reordering = min(sysctl_tcp_max_reordering, metric);
- -
+ +      metric = fack - low_seq;
+ +      if ((metric > tp->reordering * mss) && mss) {
   #if FASTRETRANS_DEBUG > 1
                 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
                          tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
                          tp->reordering,
- -                       tp->fackets_out,
+ +                       0,
                          tp->sacked_out,
                          tp->undo_marker ? tp->undo_retrans : 0);
   #endif
- -              tcp_disable_fack(tp);
+ +              tp->reordering = min_t(u32, (metric + mss - 1) / mss,
+ +                                     sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
         }
   
         tp->rack.reord = 1;
- -
         /* This exciting event is worth to be remembered. 8) */
- -      if (ts)
- -              mib_idx = LINUX_MIB_TCPTSREORDER;
- -      else if (tcp_is_reno(tp))
- -              mib_idx = LINUX_MIB_TCPRENOREORDER;
- -      else if (tcp_is_fack(tp))
- -              mib_idx = LINUX_MIB_TCPFACKREORDER;
- -      else
- -              mib_idx = LINUX_MIB_TCPSACKREORDER;
- -
- -      NET_INC_STATS(sock_net(sk), mib_idx);
+ +      NET_INC_STATS(sock_net(sk),
+ +                    ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
   }
   
   /* This must be called before lost_out is incremented */
@@@ -955,6 -990,7 +955,6 @@@ void tcp_skb_mark_lost_uncond_verify(st
    * 3. Loss detection event of two flavors:
    *    A. Scoreboard estimator decided the packet is lost.
    *       A'. Reno "three dupacks" marks head of queue lost.
- - *       A''. Its FACK modification, head until snd.fack is lost.
    *    B. SACK arrives sacking SND.NXT at the moment, when the
    *       segment was retransmitted.
    * 4. D-SACK added new rule: D-SACK changes any tag to S.
@@@ -1097,7 -1133,8 +1097,7 @@@ static bool tcp_check_dsack(struct soc
   }
   
   struct tcp_sacktag_state {
- -      int     reord;
- -      int     fack_count;
+ +      u32     reord;
         /* Timestamps for earliest and latest never-retransmitted segment
          * that was SACKed. RTO needs the earliest RTT to stay conservative,
          * but congestion control should still get an accurate delay signal.
@@@ -1106,7 -1143,6 +1106,7 @@@
         u64     last_sackt;
         struct rate_sample *rate;
         int     flag;
+ +      unsigned int mss_now;
   };
   
   /* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@@ -1156,8 -1192,7 +1156,8 @@@ static int tcp_match_skb_to_sack(struc
                 if (pkt_len >= skb->len && !in_sack)
                         return 0;
   
- -              err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
+ +              err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ +                                 pkt_len, mss, GFP_ATOMIC);
                 if (err < 0)
                         return err;
         }
@@@ -1173,15 -1208,15 +1173,15 @@@ static u8 tcp_sacktag_one(struct sock *
                           u64 xmit_time)
   {
         struct tcp_sock *tp = tcp_sk(sk);
- -      int fack_count = state->fack_count;
   
         /* Account D-SACK for retransmitted packet. */
         if (dup_sack && (sacked & TCPCB_RETRANS)) {
                 if (tp->undo_marker && tp->undo_retrans > 0 &&
                     after(end_seq, tp->undo_marker))
                         tp->undo_retrans--;
- -              if (sacked & TCPCB_SACKED_ACKED)
- -                      state->reord = min(fack_count, state->reord);
+ +              if ((sacked & TCPCB_SACKED_ACKED) &&
+ +                  before(start_seq, state->reord))
+ +                              state->reord = start_seq;
         }
   
         /* Nothing to do; acked frame is about to be dropped (was ACKed). */
@@@ -1207,10 -1242,9 +1207,10 @@@
                                  * which was in hole. It is reordering.
                                  */
                                 if (before(start_seq,
- -                                         tcp_highest_sack_seq(tp)))
- -                                      state->reord = min(fack_count,
- -                                                         state->reord);
+ +                                         tcp_highest_sack_seq(tp)) &&
+ +                                  before(start_seq, state->reord))
+ +                                      state->reord = start_seq;
+ +
                                 if (!after(end_seq, tp->high_seq))
                                         state->flag |= FLAG_ORIG_SACK_ACKED;
                                 if (state->first_sackt == 0)
@@@ -1229,10 -1263,15 +1229,10 @@@
                 tp->sacked_out += pcount;
                 tp->delivered += pcount;  /* Out-of-order packets delivered */
   
- -              fack_count += pcount;
- -
                 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
- -              if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
+ +              if (tp->lost_skb_hint &&
                     before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
                         tp->lost_cnt_hint += pcount;
- -
- -              if (fack_count > tp->fackets_out)
- -                      tp->fackets_out = fack_count;
         }
   
         /* D-SACK. We can detect redundant retransmission in S|R and plain R
@@@ -1250,13 -1289,13 +1250,13 @@@
   /* Shift newly-SACKed bytes from this skb to the immediately previous
    * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
    */
- -static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
+ +static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
+ +                          struct sk_buff *skb,
                             struct tcp_sacktag_state *state,
                             unsigned int pcount, int shifted, int mss,
                             bool dup_sack)
   {
         struct tcp_sock *tp = tcp_sk(sk);
- -      struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
         u32 start_seq = TCP_SKB_CB(skb)->seq;   /* start of newly-SACKed */
         u32 end_seq = start_seq + shifted;      /* end of newly-SACKed */
   
@@@ -1325,7 -1364,8 +1325,7 @@@
         if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
                 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
   
- -      tcp_unlink_write_queue(skb, sk);
- -      sk_wmem_free_skb(sk, skb);
+ +      tcp_rtx_queue_unlink_and_free(skb, sk);
   
         NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
   
@@@ -1375,9 -1415,9 +1375,9 @@@ static struct sk_buff *tcp_shift_skb_da
                 goto fallback;
   
         /* Can only happen with delayed DSACK + discard craziness */
- -      if (unlikely(skb == tcp_write_queue_head(sk)))
+ +      prev = skb_rb_prev(skb);
+ +      if (!prev)
                 goto fallback;
- -      prev = tcp_write_queue_prev(sk, skb);
   
         if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
                 goto fallback;
@@@ -1456,17 -1496,18 +1456,17 @@@
   
         if (!skb_shift(prev, skb, len))
                 goto fallback;
- -      if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
+ +      if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
                 goto out;
   
         /* Hole filled allows collapsing with the next as well, this is very
          * useful when hole on every nth skb pattern happens
          */
- -      if (prev == tcp_write_queue_tail(sk))
+ +      skb = skb_rb_next(prev);
+ +      if (!skb)
                 goto out;
- -      skb = tcp_write_queue_next(sk, prev);
   
         if (!skb_can_shift(skb) ||
- -          (skb == tcp_send_head(sk)) ||
             ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
             (mss != tcp_skb_seglen(skb)))
                 goto out;
@@@ -1474,11 -1515,11 +1474,11 @@@
         len = skb->len;
         if (skb_shift(prev, skb, len)) {
                 pcount += tcp_skb_pcount(skb);
- -              tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
+ +              tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+ +                              len, mss, 0);
         }
   
   out:
- -      state->fack_count += pcount;
         return prev;
   
   noop:
@@@ -1498,10 -1539,13 +1498,10 @@@ static struct sk_buff *tcp_sacktag_walk
         struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *tmp;
   
- -      tcp_for_write_queue_from(skb, sk) {
+ +      skb_rbtree_walk_from(skb) {
                 int in_sack = 0;
                 bool dup_sack = dup_sack_in;
   
- -              if (skb == tcp_send_head(sk))
- -                      break;
- -
                 /* queue is in-order => we can short-circuit the walk early */
                 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
                         break;
@@@ -1550,48 -1594,34 +1550,48 @@@
                                                 tcp_skb_pcount(skb),
                                                 skb->skb_mstamp);
                         tcp_rate_skb_delivered(sk, skb, state->rate);
+ +                      if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+ +                              list_del_init(&skb->tcp_tsorted_anchor);
   
                         if (!before(TCP_SKB_CB(skb)->seq,
                                     tcp_highest_sack_seq(tp)))
                                 tcp_advance_highest_sack(sk, skb);
                 }
- -
- -              state->fack_count += tcp_skb_pcount(skb);
         }
         return skb;
   }
   
- -/* Avoid all extra work that is being done by sacktag while walking in
- - * a normal way
- - */
+ +static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
+ +                                         struct tcp_sacktag_state *state,
+ +                                         u32 seq)
+ +{
+ +      struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
+ +      struct sk_buff *skb;
+ +
+ +      while (*p) {
+ +              parent = *p;
+ +              skb = rb_to_skb(parent);
+ +              if (before(seq, TCP_SKB_CB(skb)->seq)) {
+ +                      p = &parent->rb_left;
+ +                      continue;
+ +              }
+ +              if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
+ +                      p = &parent->rb_right;
+ +                      continue;
+ +              }
+ +              return skb;
+ +      }
+ +      return NULL;
+ +}
+ +
   static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
                                         struct tcp_sacktag_state *state,
                                         u32 skip_to_seq)
   {
- -      tcp_for_write_queue_from(skb, sk) {
- -              if (skb == tcp_send_head(sk))
- -                      break;
- -
- -              if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
- -                      break;
+ +      if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
+ +              return skb;
   
- -              state->fack_count += tcp_skb_pcount(skb);
- -      }
- -      return skb;
+ +      return tcp_sacktag_bsearch(sk, state, skip_to_seq);
   }
   
   static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@@ -1636,10 -1666,13 +1636,10 @@@ tcp_sacktag_write_queue(struct sock *sk
         int first_sack_index;
   
         state->flag = 0;
- -      state->reord = tp->packets_out;
+ +      state->reord = tp->snd_nxt;
   
- -      if (!tp->sacked_out) {
- -              if (WARN_ON(tp->fackets_out))
- -                      tp->fackets_out = 0;
+ +      if (!tp->sacked_out)
                 tcp_highest_sack_reset(sk);
- -      }
   
         found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
                                          num_sacks, prior_snd_una);
@@@ -1710,8 -1743,8 +1710,8 @@@
                 }
         }
   
- -      skb = tcp_write_queue_head(sk);
- -      state->fack_count = 0;
+ +      state->mss_now = tcp_current_mss(sk);
+ +      skb = NULL;
         i = 0;
   
         if (!tp->sacked_out) {
@@@ -1768,6 -1801,7 +1768,6 @@@
                                 skb = tcp_highest_sack(sk);
                                 if (!skb)
                                         break;
- -                              state->fack_count = tp->fackets_out;
                                 cache++;
                                 goto walk;
                         }
@@@ -1782,6 -1816,7 +1782,6 @@@
                         skb = tcp_highest_sack(sk);
                         if (!skb)
                                 break;
- -                      state->fack_count = tp->fackets_out;
                 }
                 skb = tcp_sacktag_skip(skb, sk, state, start_seq);
   
@@@ -1801,8 -1836,9 +1801,8 @@@ advance_sp
         for (j = 0; j < used_sacks; j++)
                 tp->recv_sack_cache[i++] = sp[j];
   
- -      if ((state->reord < tp->fackets_out) &&
- -          ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
- -              tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
+ +      if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
+ +              tcp_check_sack_reordering(sk, state->reord, 0);
   
         tcp_verify_left_out(tp);
   out:
@@@ -1840,13 -1876,8 +1840,13 @@@ static bool tcp_limit_reno_sacked(struc
   static void tcp_check_reno_reordering(struct sock *sk, const int addend)
   {
         struct tcp_sock *tp = tcp_sk(sk);
- -      if (tcp_limit_reno_sacked(tp))
- -              tcp_update_reordering(sk, tp->packets_out + addend, 0);
+ +
+ +      if (!tcp_limit_reno_sacked(tp))
+ +              return;
+ +
+ +      tp->reordering = min_t(u32, tp->packets_out + addend,
+ +                             sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ +      NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
   }
   
   /* Emulate SACKs for SACKless connection: account for a new dupack. */
@@@ -1892,6 -1923,7 +1892,6 @@@ void tcp_clear_retrans(struct tcp_sock 
         tp->lost_out = 0;
         tp->undo_marker = 0;
         tp->undo_retrans = -1;
- -      tp->fackets_out = 0;
         tp->sacked_out = 0;
   }
   
@@@ -1936,15 -1968,19 +1936,15 @@@ void tcp_enter_loss(struct sock *sk
         if (tcp_is_reno(tp))
                 tcp_reset_reno_sack(tp);
   
- -      skb = tcp_write_queue_head(sk);
+ +      skb = tcp_rtx_queue_head(sk);
         is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
         if (is_reneg) {
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
                 tp->sacked_out = 0;
- -              tp->fackets_out = 0;
         }
         tcp_clear_all_retrans_hints(tp);
   
- -      tcp_for_write_queue(skb, sk) {
- -              if (skb == tcp_send_head(sk))
- -                      break;
- -
+ +      skb_rbtree_walk_from(skb) {
                 mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
                              is_reneg);
                 if (mark_lost)
@@@ -1978,7 -2014,7 +1978,7 @@@
          * falsely raise the receive window, which results in repeated
          * timeouts and stop-and-go behavior.
          */
- -      tp->frto = sysctl_tcp_frto &&
+ +      tp->frto = net->ipv4.sysctl_tcp_frto &&
                    (new_recovery || icsk->icsk_retransmits) &&
                    !inet_csk(sk)->icsk_mtup.probe_size;
   }
@@@ -2007,10 -2043,19 +2007,10 @@@ static bool tcp_check_sack_reneging(str
         return false;
   }
   
- -static inline int tcp_fackets_out(const struct tcp_sock *tp)
- -{
- -      return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
- -}
- -
   /* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
    * counter when SACK is enabled (without SACK, sacked_out is used for
    * that purpose).
    *
- - * Instead, with FACK TCP uses fackets_out that includes both SACKed
- - * segments up to the highest received SACK block so far and holes in
- - * between them.
- - *
    * With reordering, holes may still be in flight, so RFC3517 recovery
    * uses pure sacked_out (total number of SACKed segments) even though
    * it violates the RFC that uses duplicate ACKs, often these are equal
@@@ -2020,10 -2065,10 +2020,10 @@@
    */
   static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
   {
- -      return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
+ +      return tp->sacked_out + 1;
   }
   
- -/* Linux NewReno/SACK/FACK/ECN state machine.
+ +/* Linux NewReno/SACK/ECN state machine.
    * --------------------------------------
    *
    * "Open"     Normal state, no dubious events, fast path.
@@@ -2088,6 -2133,16 +2088,6 @@@
    *            dynamically measured and adjusted. This is implemented in
    *            tcp_rack_mark_lost.
    *
- - *            FACK (Disabled by default. Subsumbed by RACK):
- - *            It is the simplest heuristics. As soon as we decided
- - *            that something is lost, we decide that _all_ not SACKed
- - *            packets until the most forward SACK are lost. I.e.
- - *            lost_out = fackets_out - sacked_out and left_out = fackets_out.
- - *            It is absolutely correct estimate, if network does not reorder
- - *            packets. And it loses any connection to reality when reordering
- - *            takes place. We use FACK by default until reordering
- - *            is suspected on the path to this destination.
- - *
    *            If the receiver does not support SACK:
    *
    *            NewReno (RFC6582): in Recovery we assume that one segment
@@@ -2136,7 -2191,7 +2136,7 @@@ static bool tcp_time_to_recover(struct 
   }
   
   /* Detect loss in event "A" above by marking head of queue up as lost.
- - * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ + * For non-SACK(Reno) senders, the first "packets" number of segments
    * are considered lost. For RFC3517 SACK, a segment is considered lost if it
    * has at least tp->reordering SACKed seqments above it; "packets" refers to
    * the maximum SACKed segments to pass before reaching this limit.
@@@ -2151,18 -2206,20 +2151,18 @@@ static void tcp_mark_head_lost(struct s
         const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
   
         WARN_ON(packets > tp->packets_out);
- -      if (tp->lost_skb_hint) {
- -              skb = tp->lost_skb_hint;
- -              cnt = tp->lost_cnt_hint;
+ +      skb = tp->lost_skb_hint;
+ +      if (skb) {
                 /* Head already handled? */
- -              if (mark_head && skb != tcp_write_queue_head(sk))
+ +              if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
                         return;
+ +              cnt = tp->lost_cnt_hint;
         } else {
- -              skb = tcp_write_queue_head(sk);
+ +              skb = tcp_rtx_queue_head(sk);
                 cnt = 0;
         }
   
- -      tcp_for_write_queue_from(skb, sk) {
- -              if (skb == tcp_send_head(sk))
- -                      break;
+ +      skb_rbtree_walk_from(skb) {
                 /* TODO: do this better */
                 /* this is not the most efficient way to do this... */
                 tp->lost_skb_hint = skb;
@@@ -2172,12 -2229,12 +2172,12 @@@
                         break;
   
                 oldcnt = cnt;
- -              if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
+ +              if (tcp_is_reno(tp) ||
                     (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
                         cnt += tcp_skb_pcount(skb);
   
                 if (cnt > packets) {
- -                      if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+ +                      if (tcp_is_sack(tp) ||
                             (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
                             (oldcnt >= packets))
                                 break;
@@@ -2186,8 -2243,7 +2186,8 @@@
                         /* If needed, chop off the prefix to mark as lost. */
                         lost = (packets - oldcnt) * mss;
                         if (lost < skb->len &&
- -                          tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
+ +                          tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ +                                       lost, mss, GFP_ATOMIC) < 0)
                                 break;
                         cnt = packets;
                 }
@@@ -2208,6 -2264,11 +2208,6 @@@ static void tcp_update_scoreboard(struc
   
         if (tcp_is_reno(tp)) {
                 tcp_mark_head_lost(sk, 1, 1);
- -      } else if (tcp_is_fack(tp)) {
- -              int lost = tp->fackets_out - tp->reordering;
- -              if (lost <= 0)
- -                      lost = 1;
- -              tcp_mark_head_lost(sk, lost, 0);
         } else {
                 int sacked_upto = tp->sacked_out - tp->reordering;
                 if (sacked_upto >= 0)
@@@ -2266,16 -2327,16 +2266,16 @@@ static bool tcp_any_retrans_done(const 
         if (tp->retrans_out)
                 return true;
   
- -      skb = tcp_write_queue_head(sk);
+ +      skb = tcp_rtx_queue_head(sk);
         if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
                 return true;
   
         return false;
   }
   
- -#if FASTRETRANS_DEBUG > 1
   static void DBGUNDO(struct sock *sk, const char *msg)
   {
+ +#if FASTRETRANS_DEBUG > 1
         struct tcp_sock *tp = tcp_sk(sk);
         struct inet_sock *inet = inet_sk(sk);
   
@@@ -2297,8 -2358,10 +2297,8 @@@
                          tp->packets_out);
         }
   #endif
- -}
- -#else
- -#define DBGUNDO(x...) do { } while (0)
   #endif
+ +}
   
   static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
   {
@@@ -2307,7 -2370,9 +2307,7 @@@
         if (unmark_loss) {
                 struct sk_buff *skb;
   
- -              tcp_for_write_queue(skb, sk) {
- -                      if (skb == tcp_send_head(sk))
- -                              break;
+ +              skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
                         TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
                 }
                 tp->lost_out = 0;
@@@ -2352,8 -2417,6 +2352,8 @@@ static bool tcp_try_undo_recovery(struc
                         mib_idx = LINUX_MIB_TCPFULLUNDO;
   
                 NET_INC_STATS(sock_net(sk), mib_idx);
+ +      } else if (tp->rack.reo_wnd_persist) {
+ +              tp->rack.reo_wnd_persist--;
         }
         if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
                 /* Hold old state until something *above* high_seq
@@@ -2373,8 -2436,6 +2373,8 @@@ static bool tcp_try_undo_dsack(struct s
         struct tcp_sock *tp = tcp_sk(sk);
   
         if (tp->undo_marker && !tp->undo_retrans) {
+ +              tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
+ +                                             tp->rack.reo_wnd_persist + 1);
                 DBGUNDO(sk, "D-SACK");
                 tcp_undo_cwnd_reduction(sk, false);
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
@@@ -2555,7 -2616,9 +2555,7 @@@ void tcp_simple_retransmit(struct sock 
         struct sk_buff *skb;
         unsigned int mss = tcp_current_mss(sk);
   
- -      tcp_for_write_queue(skb, sk) {
- -              if (skb == tcp_send_head(sk))
- -                      break;
+ +      skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
                 if (tcp_skb_seglen(skb) > mss &&
                     !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
                         if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
@@@ -2649,7 -2712,7 +2649,7 @@@ static void tcp_process_loss(struct soc
                          * is updated in tcp_ack()). Otherwise fall back to
                          * the conventional recovery.
                          */
- -                      if (tcp_send_head(sk) &&
+ +                      if (!tcp_write_queue_empty(sk) &&
                             after(tcp_wnd_end(tp), tp->snd_nxt)) {
                                 *rexmit = REXMIT_NEW;
                                 return;
@@@ -2676,15 -2739,15 +2676,15 @@@
   }
   
   /* Undo during fast recovery after partial ACK. */
- -static bool tcp_try_undo_partial(struct sock *sk, const int acked)
+ +static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
   {
         struct tcp_sock *tp = tcp_sk(sk);
   
         if (tp->undo_marker && tcp_packet_delayed(tp)) {
                 /* Plain luck! Hole if filled with delayed
- -               * packet, rather than with a retransmit.
+ +               * packet, rather than with a retransmit. Check reordering.
                  */
- -              tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+ +              tcp_check_sack_reordering(sk, prior_snd_una, 1);
   
                 /* We are getting evidence that the reordering degree is higher
                  * than we realized. If there are no retransmits out then we
@@@ -2711,7 -2774,7 +2711,7 @@@ static void tcp_rack_identify_loss(stru
         struct tcp_sock *tp = tcp_sk(sk);
   
         /* Use RACK to detect loss */
- -      if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
+ +      if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
                 u32 prior_retrans = tp->retrans_out;
   
                 tcp_rack_mark_lost(sk);
@@@ -2720,14 -2783,6 +2720,14 @@@
         }
   }
   
+ +static bool tcp_force_fast_retransmit(struct sock *sk)
+ +{
+ +      struct tcp_sock *tp = tcp_sk(sk);
+ +
+ +      return after(tcp_highest_sack_seq(tp),
+ +                   tp->snd_una + tp->reordering * tp->mss_cache);
+ +}
+ +
   /* Process an event, which can update packets-in-flight not trivially.
    * Main goal of this function is to calculate new estimate for left_out,
    * taking into account both packets sitting in receiver's buffer and
@@@ -2740,17 -2795,19 +2740,17 @@@
    * It does _not_ decide what to send, it is made in function
    * tcp_xmit_retransmit_queue().
    */
- -static void tcp_fastretrans_alert(struct sock *sk, const int acked,
+ +static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
                                   bool is_dupack, int *ack_flag, int *rexmit)
   {
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         int fast_rexmit = 0, flag = *ack_flag;
         bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
- -                                  (tcp_fackets_out(tp) > tp->reordering));
+ +                                   tcp_force_fast_retransmit(sk));
   
- -      if (WARN_ON(!tp->packets_out && tp->sacked_out))
+ +      if (!tp->packets_out && tp->sacked_out)
                 tp->sacked_out = 0;
- -      if (WARN_ON(!tp->sacked_out && tp->fackets_out))
- -              tp->fackets_out = 0;
   
         /* Now state machine starts.
          * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@@ -2797,11 -2854,11 +2797,11 @@@
                         if (tcp_is_reno(tp) && is_dupack)
                                 tcp_add_reno_sack(sk);
                 } else {
- -                      if (tcp_try_undo_partial(sk, acked))
+ +                      if (tcp_try_undo_partial(sk, prior_snd_una))
                                 return;
                         /* Partial ACK arrived. Force fast retransmit. */
                         do_lost = tcp_is_reno(tp) ||
- -                                tcp_fackets_out(tp) > tp->reordering;
+ +                                tcp_force_fast_retransmit(sk);
                 }
                 if (tcp_try_undo_dsack(sk)) {
                         tcp_try_keep_open(sk);
@@@ -2816,7 -2873,6 +2816,7 @@@
                       (*ack_flag & FLAG_LOST_RETRANS)))
                         return;
                 /* Change state if cwnd is undone or retransmits are lost */
+ +              /* fall through */
         default:
                 if (tcp_is_reno(tp)) {
                         if (flag & FLAG_SND_UNA_ADVANCED)
@@@ -2857,8 -2913,8 +2857,8 @@@
   
   static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
   {
+ +      u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
         struct tcp_sock *tp = tcp_sk(sk);
- -      u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
   
         minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
                            rtt_us ? : jiffies_to_usecs(1));
@@@ -3000,31 -3056,28 +3000,31 @@@ static void tcp_ack_tstamp(struct sock 
   
         shinfo = skb_shinfo(skb);
         if (!before(shinfo->tskey, prior_snd_una) &&
- -          before(shinfo->tskey, tcp_sk(sk)->snd_una))
- -              __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+ +          before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
+ +              tcp_skb_tsorted_save(skb) {
+ +                      __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+ +              } tcp_skb_tsorted_restore(skb);
+ +      }
   }
   
   /* Remove acknowledged frames from the retransmission queue. If our packet
    * is before the ack sequence we can discard it as it's confirmed to have
    * arrived at the other end.
    */
- -static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- -                             u32 prior_snd_una, int *acked,
+ +static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
+ +                             u32 prior_snd_una,
                                struct tcp_sacktag_state *sack)
   {
         const struct inet_connection_sock *icsk = inet_csk(sk);
         u64 first_ackt, last_ackt;
         struct tcp_sock *tp = tcp_sk(sk);
         u32 prior_sacked = tp->sacked_out;
- -      u32 reord = tp->packets_out;
+ +      u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
+ +      struct sk_buff *skb, *next;
         bool fully_acked = true;
         long sack_rtt_us = -1L;
         long seq_rtt_us = -1L;
         long ca_rtt_us = -1L;
- -      struct sk_buff *skb;
         u32 pkts_acked = 0;
         u32 last_in_flight = 0;
         bool rtt_update;
@@@ -3032,9 -3085,8 +3032,9 @@@
   
         first_ackt = 0;
   
- -      while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+ +      for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
                 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ +              const u32 start_seq = scb->seq;
                 u8 sacked = scb->sacked;
                 u32 acked_pcount;
   
@@@ -3051,6 -3103,8 +3051,6 @@@
                                 break;
                         fully_acked = false;
                 } else {
- -                      /* Speedup tcp_unlink_write_queue() and next loop */
- -                      prefetchw(skb->next);
                         acked_pcount = tcp_skb_pcount(skb);
                 }
   
@@@ -3065,8 -3119,7 +3065,8 @@@
                                 first_ackt = last_ackt;
   
                         last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
- -                      reord = min(pkts_acked, reord);
+ +                      if (before(start_seq, reord))
+ +                              reord = start_seq;
                         if (!after(scb->end_seq, tp->high_seq))
                                 flag |= FLAG_ORIG_SACK_ACKED;
                 }
@@@ -3103,12 -3156,12 +3103,12 @@@
                 if (!fully_acked)
                         break;
   
- -              tcp_unlink_write_queue(skb, sk);
- -              sk_wmem_free_skb(sk, skb);
+ +              next = skb_rb_next(skb);
                 if (unlikely(skb == tp->retransmit_skb_hint))
                         tp->retransmit_skb_hint = NULL;
                 if (unlikely(skb == tp->lost_skb_hint))
                         tp->lost_skb_hint = NULL;
+ +              tcp_rtx_queue_unlink_and_free(skb, sk);
         }
   
         if (!skb)
@@@ -3144,12 -3197,16 +3144,12 @@@
                         int delta;
   
                         /* Non-retransmitted hole got filled? That's reordering */
- -                      if (reord < prior_fackets && reord <= tp->fackets_out)
- -                              tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+ +                      if (before(reord, prior_fack))
+ +                              tcp_check_sack_reordering(sk, reord, 0);
   
- -                      delta = tcp_is_fack(tp) ? pkts_acked :
- -                                                prior_sacked - tp->sacked_out;
+ +                      delta = prior_sacked - tp->sacked_out;
                         tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
                 }
- -
- -              tp->fackets_out -= min(pkts_acked, tp->fackets_out);
- -
         } else if (skb && rtt_update && sack_rtt_us >= 0 &&
                    sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
                 /* Do not re-arm RTO if the sack RTT is measured from data sent
@@@ -3190,19 -3247,18 +3190,19 @@@
                 }
         }
   #endif
- -      *acked = pkts_acked;
         return flag;
   }
   
   static void tcp_ack_probe(struct sock *sk)
   {
- -      const struct tcp_sock *tp = tcp_sk(sk);
         struct inet_connection_sock *icsk = inet_csk(sk);
+ +      struct sk_buff *head = tcp_send_head(sk);
+ +      const struct tcp_sock *tp = tcp_sk(sk);
   
         /* Was it a usable window open? */
- -
- -      if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
+ +      if (!head)
+ +              return;
+ +      if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
                 icsk->icsk_backoff = 0;
                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
                 /* Socket must be waked up by subsequent tcp_data_snd_check().
@@@ -3322,7 -3378,7 +3322,7 @@@ static int tcp_ack_update_window(struc
                         tp->pred_flags = 0;
                         tcp_fast_path_check(sk);
   
- -                      if (tcp_send_head(sk))
+ +                      if (!tcp_write_queue_empty(sk))
                                 tcp_slow_start_after_idle_check(sk);
   
                         if (nwin > tp->max_window) {
@@@ -3343,7 -3399,7 +3343,7 @@@ static bool __tcp_oow_rate_limited(stru
         if (*last_oow_ack_time) {
                 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
   
- -              if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+ +              if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
                         NET_INC_STATS(net, mib_idx);
                         return true;    /* rate-limited: don't send yet! */
                 }
@@@ -3379,11 -3435,10 +3379,11 @@@ static void tcp_send_challenge_ack(stru
         static u32 challenge_timestamp;
         static unsigned int challenge_count;
         struct tcp_sock *tp = tcp_sk(sk);
+ +      struct net *net = sock_net(sk);
         u32 count, now;
   
         /* First check our per-socket dupack rate limit. */
- -      if (__tcp_oow_rate_limited(sock_net(sk),
+ +      if (__tcp_oow_rate_limited(net,
                                    LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
                                    &tp->last_oow_ack_time))
                 return;
@@@ -3391,16 -3446,16 +3391,16 @@@
         /* Then check host-wide RFC 5961 rate limit. */
         now = jiffies / HZ;
         if (now != challenge_timestamp) {
- -              u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+ +              u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+ +              u32 half = (ack_limit + 1) >> 1;
   
                 challenge_timestamp = now;
- -              WRITE_ONCE(challenge_count, half +
- -                         prandom_u32_max(sysctl_tcp_challenge_ack_limit));
+ +              WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
         }
         count = READ_ONCE(challenge_count);
         if (count > 0) {
                 WRITE_ONCE(challenge_count, count - 1);
- -              NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+ +              NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
                 tcp_send_ack(sk);
         }
   }
@@@ -3498,17 -3553,18 +3498,17 @@@ static int tcp_ack(struct sock *sk, con
         u32 ack_seq = TCP_SKB_CB(skb)->seq;
         u32 ack = TCP_SKB_CB(skb)->ack_seq;
         bool is_dupack = false;
- -      u32 prior_fackets;
         int prior_packets = tp->packets_out;
         u32 delivered = tp->delivered;
         u32 lost = tp->lost;
- -      int acked = 0; /* Number of packets newly acked */
         int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+ +      u32 prior_fack;
   
         sack_state.first_sackt = 0;
         sack_state.rate = &rs;
   
- -      /* We very likely will need to access write queue head. */
- -      prefetchw(sk->sk_write_queue.next);
+ +      /* We very likely will need to access rtx queue. */
+ +      prefetch(sk->tcp_rtx_queue.rb_node);
   
         /* If the ack is older than previous acks
          * then we can probably ignore it.
@@@ -3534,7 -3590,7 +3534,7 @@@
                 icsk->icsk_retransmits = 0;
         }
   
- -      prior_fackets = tp->fackets_out;
+ +      prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
         rs.prior_in_flight = tcp_packets_in_flight(tp);
   
         /* ts_recent update must be made after we are sure that the packet
@@@ -3590,9 -3646,8 +3590,9 @@@
                 goto no_queue;
   
         /* See if we can take anything off of the retransmit queue. */
- -      flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
- -                                  &sack_state);
+ +      flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
+ +
+ +      tcp_rack_update_reo_wnd(sk, &rs);
   
         if (tp->tlp_high_seq)
                 tcp_process_tlp_ack(sk, ack, flag);
@@@ -3602,8 -3657,7 +3602,8 @@@
   
         if (tcp_ack_is_dubious(sk, flag)) {
                 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- -              tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ +              tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ +                                    &rexmit);
         }
   
         if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
@@@ -3619,13 -3673,13 +3619,13 @@@
   no_queue:
         /* If data was DSACKed, see if we can undo a cwnd reduction. */
         if (flag & FLAG_DSACKING_ACK)
- -              tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ +              tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ +                                    &rexmit);
         /* If this ack opens up a zero window, clear backoff.  It was
          * being used to time the probes, and is probably far higher than
          * it needs to be for normal retransmission.
          */
- -      if (tcp_send_head(sk))
- -              tcp_ack_probe(sk);
+ +      tcp_ack_probe(sk);
   
         if (tp->tlp_high_seq)
                 tcp_process_tlp_ack(sk, ack, flag);
@@@ -3642,8 -3696,7 +3642,8 @@@ old_ack
         if (TCP_SKB_CB(skb)->sacked) {
                 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
                                                 &sack_state);
- -              tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ +              tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ +                                    &rexmit);
                 tcp_xmit_recovery(sk, rexmit);
         }
   
@@@ -3668,21 -3721,6 +3668,21 @@@ static void tcp_parse_fastopen_option(i
         foc->exp = exp_opt;
   }
   
+ +static void smc_parse_options(const struct tcphdr *th,
+ +                            struct tcp_options_received *opt_rx,
+ +                            const unsigned char *ptr,
+ +                            int opsize)
+ +{
+ +#if IS_ENABLED(CONFIG_SMC)
+ +      if (static_branch_unlikely(&tcp_have_smc)) {
+ +              if (th->syn && !(opsize & 1) &&
+ +                  opsize >= TCPOLEN_EXP_SMC_BASE &&
+ +                  get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+ +                      opt_rx->smc_ok = 1;
+ +      }
+ +#endif
+ +}
+ +
   /* Look for tcp options. Normally only called on SYN and SYNACK packets.
    * But, this can also be called on packets in the established flow when
    * the fast version below fails.
@@@ -3790,9 -3828,6 +3790,9 @@@ void tcp_parse_options(const struct ne
                                         tcp_parse_fastopen_option(opsize -
                                                 TCPOLEN_EXP_FASTOPEN_BASE,
                                                 ptr + 2, th->syn, foc, true);
+ +                              else
+ +                                      smc_parse_options(th, opt_rx, ptr,
+ +                                                        opsize);
                                 break;
   
                         }
@@@ -3960,8 -3995,6 +3960,8 @@@ static inline bool tcp_sequence(const s
   /* When we get a reset we do this. */
   void tcp_reset(struct sock *sk)
   {
+ +      trace_tcp_receive_reset(sk);
+ +
         /* We want the right error as BSD sees it (and indeed as we do). */
         switch (sk->sk_state) {
         case TCP_SYN_SENT:
@@@ -4084,7 -4117,7 +4084,7 @@@ static void tcp_dsack_set(struct sock *
   {
         struct tcp_sock *tp = tcp_sk(sk);
   
- -      if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+ +      if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
                 int mib_idx;
   
                 if (before(seq, tp->rcv_nxt))
@@@ -4119,7 -4152,7 +4119,7 @@@ static void tcp_send_dupack(struct soc
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
                 tcp_enter_quickack_mode(sk);
   
- -              if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+ +              if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
                         u32 end_seq = TCP_SKB_CB(skb)->end_seq;
   
                         if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@@ -4235,6 -4268,11 +4235,6 @@@ static void tcp_sack_remove(struct tcp_
         tp->rx_opt.num_sacks = num_sacks;
   }
   
- -enum tcp_queue {
- -      OOO_QUEUE,
- -      RCV_QUEUE,
- -};
- -
   /**
    * tcp_try_coalesce - try to merge skb to prior one
    * @sk: socket
@@@ -4250,6 -4288,7 +4250,6 @@@
    * Returns true if caller should free @from instead of queueing it
    */
   static bool tcp_try_coalesce(struct sock *sk,
- -                           enum tcp_queue dest,
                              struct sk_buff *to,
                              struct sk_buff *from,
                              bool *fragstolen)
@@@ -4274,7 -4313,10 +4274,7 @@@
   
         if (TCP_SKB_CB(from)->has_rxtstamp) {
                 TCP_SKB_CB(to)->has_rxtstamp = true;
- -              if (dest == OOO_QUEUE)
- -                      TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
- -              else
- -                      to->tstamp = from->tstamp;
+ +              to->tstamp = from->tstamp;
         }
   
         return true;
@@@ -4299,7 -4341,7 +4299,7 @@@ static void tcp_ofo_queue(struct sock *
   
         p = rb_first(&tp->out_of_order_queue);
         while (p) {
- -              skb = rb_entry(p, struct sk_buff, rbnode);
+ +              skb = rb_to_skb(p);
                 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
                         break;
   
@@@ -4311,6 -4353,9 +4311,6 @@@
                 }
                 p = rb_next(p);
                 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
- -              /* Replace tstamp which was stomped by rbnode */
- -              if (TCP_SKB_CB(skb)->has_rxtstamp)
- -                      skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
   
                 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
                         SOCK_DEBUG(sk, "ofo packet was already received\n");
@@@ -4322,7 -4367,8 +4322,7 @@@
                            TCP_SKB_CB(skb)->end_seq);
   
                 tail = skb_peek_tail(&sk->sk_receive_queue);
- -              eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
- -                                               tail, skb, &fragstolen);
+ +              eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
                 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
                 if (!eaten)
@@@ -4363,7 -4409,7 +4363,7 @@@ static int tcp_try_rmem_schedule(struc
   static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
   {
         struct tcp_sock *tp = tcp_sk(sk);
- -      struct rb_node **p, *q, *parent;
+ +      struct rb_node **p, *parent;
         struct sk_buff *skb1;
         u32 seq, end_seq;
         bool fragstolen;
@@@ -4376,6 -4422,10 +4376,6 @@@
                 return;
         }
   
- -      /* Stash tstamp to avoid being stomped on by rbnode */
- -      if (TCP_SKB_CB(skb)->has_rxtstamp)
- -              TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
- -
         /* Disable header prediction. */
         tp->pred_flags = 0;
         inet_csk_schedule_ack(sk);
@@@ -4403,7 -4453,7 +4403,7 @@@
         /* In the typical case, we are adding an skb to the end of the list.
          * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
          */
- -      if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
+ +      if (tcp_try_coalesce(sk, tp->ooo_last_skb,
                              skb, &fragstolen)) {
   coalesce_done:
                 tcp_grow_window(sk, skb);
@@@ -4422,7 -4472,7 +4422,7 @@@
         parent = NULL;
         while (*p) {
                 parent = *p;
- -              skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ +              skb1 = rb_to_skb(parent);
                 if (before(seq, TCP_SKB_CB(skb1)->seq)) {
                         p = &parent->rb_left;
                         continue;
@@@ -4454,7 -4504,7 +4454,7 @@@
                                 __kfree_skb(skb1);
                                 goto merge_right;
                         }
- -              } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
+ +              } else if (tcp_try_coalesce(sk, skb1,
                                             skb, &fragstolen)) {
                         goto coalesce_done;
                 }
@@@ -4467,7 -4517,9 +4467,7 @@@ insert
   
   merge_right:
         /* Remove other segments covered by skb. */
- -      while ((q = rb_next(&skb->rbnode)) != NULL) {
- -              skb1 = rb_entry(q, struct sk_buff, rbnode);
- -
+ +      while ((skb1 = skb_rb_next(skb)) != NULL) {
                 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
                         break;
                 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
@@@ -4482,7 -4534,7 +4482,7 @@@
                 tcp_drop(sk, skb1);
         }
         /* If there is no skb after us, we are the last_skb ! */
- -      if (!q)
+ +      if (!skb1)
                 tp->ooo_last_skb = skb;
   
   add_sack:
@@@ -4504,7 -4556,7 +4504,7 @@@ static int __must_check tcp_queue_rcv(s
   
         __skb_pull(skb, hdrlen);
         eaten = (tail &&
- -               tcp_try_coalesce(sk, RCV_QUEUE, tail,
+ +               tcp_try_coalesce(sk, tail,
                                   skb, fragstolen)) ? 1 : 0;
         tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
         if (!eaten) {
@@@ -4668,7 -4720,7 +4668,7 @@@ static struct sk_buff *tcp_skb_next(str
         if (list)
                 return !skb_queue_is_last(list, skb) ? skb->next : NULL;
   
- -      return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+ +      return skb_rb_next(skb);
   }
   
   static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@@ -4689,7 -4741,7 +4689,7 @@@
   }
   
   /* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
- -static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+ +void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
   {
         struct rb_node **p = &root->rb_node;
         struct rb_node *parent = NULL;
@@@ -4697,7 -4749,7 +4697,7 @@@
   
         while (*p) {
                 parent = *p;
- -              skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ +              skb1 = rb_to_skb(parent);
                 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
                         p = &parent->rb_left;
                 else
@@@ -4744,7 -4796,7 +4744,7 @@@ restart
                  *   overlaps to the next one.
                  */
                 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
- -                  (tcp_win_from_space(skb->truesize) > skb->len ||
+ +                  (tcp_win_from_space(sk, skb->truesize) > skb->len ||
                      before(TCP_SKB_CB(skb)->seq, start))) {
                         end_of_skbs = false;
                         break;
@@@ -4816,19 -4868,26 +4816,19 @@@ static void tcp_collapse_ofo_queue(stru
   {
         struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *skb, *head;
- -      struct rb_node *p;
         u32 start, end;
   
- -      p = rb_first(&tp->out_of_order_queue);
- -      skb = rb_entry_safe(p, struct sk_buff, rbnode);
+ +      skb = skb_rb_first(&tp->out_of_order_queue);
   new_range:
         if (!skb) {
- -              p = rb_last(&tp->out_of_order_queue);
- -              /* Note: This is possible p is NULL here. We do not
- -               * use rb_entry_safe(), as ooo_last_skb is valid only
- -               * if rbtree is not empty.
- -               */
- -              tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
+ +              tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
                 return;
         }
         start = TCP_SKB_CB(skb)->seq;
         end = TCP_SKB_CB(skb)->end_seq;
   
         for (head = skb;;) {
- -              skb = tcp_skb_next(skb, NULL);
+ +              skb = skb_rb_next(skb);
   
                 /* Range is terminated when we see a gap or when
                  * we are at the queue end.
@@@ -4871,14 -4930,14 +4871,14 @@@ static bool tcp_prune_ofo_queue(struct 
         do {
                 prev = rb_prev(node);
                 rb_erase(node, &tp->out_of_order_queue);
- -              tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
+ +              tcp_drop(sk, rb_to_skb(node));
                 sk_mem_reclaim(sk);
                 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
                     !tcp_under_memory_pressure(sk))
                         break;
                 node = prev;
         } while (node);
- -      tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+ +      tp->ooo_last_skb = rb_to_skb(prev);
   
         /* Reset SACK state.  A conforming SACK implementation will
          * do the same at a timeout based retransmit.  When a connection
@@@ -5053,7 -5112,7 +5053,7 @@@ static void tcp_check_urg(struct sock *
         struct tcp_sock *tp = tcp_sk(sk);
         u32 ptr = ntohs(th->urg_ptr);
   
- -      if (ptr && !sysctl_tcp_stdurg)
+ +      if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
                 ptr--;
         ptr += ntohl(th->seq);
   
@@@ -5473,13 -5532,20 +5473,13 @@@ void tcp_finish_connect(struct sock *sk
                 security_inet_conn_established(sk, skb);
         }
   
- -      /* Make sure socket is routed, for correct metrics.  */
- -      icsk->icsk_af_ops->rebuild_header(sk);
- -
- -      tcp_init_metrics(sk);
- -      tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
- -      tcp_init_congestion_control(sk);
+ +      tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
   
         /* Prevent spurious tcp_cwnd_restart() on first data
          * packet.
          */
         tp->lsndtime = tcp_jiffies32;
   
- -      tcp_init_buffer_space(sk);
- -
         if (sock_flag(sk, SOCK_KEEPOPEN))
                 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
   
@@@ -5493,7 -5559,7 +5493,7 @@@ static bool tcp_rcv_fastopen_synack(str
                                     struct tcp_fastopen_cookie *cookie)
   {
         struct tcp_sock *tp = tcp_sk(sk);
- -      struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
+ +      struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
         u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
         bool syn_drop = false;
   
@@@ -5528,8 -5594,9 +5528,8 @@@
         tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
   
         if (data) { /* Retransmit unacked data in SYN */
- -              tcp_for_write_queue_from(data, sk) {
- -                      if (data == tcp_send_head(sk) ||
- -                          __tcp_retransmit_skb(sk, data, 1))
+ +              skb_rbtree_walk_from(data) {
+ +                      if (__tcp_retransmit_skb(sk, data, 1))
                                 break;
                 }
                 tcp_rearm_rto(sk);
@@@ -5547,16 -5614,6 +5547,16 @@@
         return false;
   }
   
+ +static void smc_check_reset_syn(struct tcp_sock *tp)
+ +{
+ +#if IS_ENABLED(CONFIG_SMC)
+ +      if (static_branch_unlikely(&tcp_have_smc)) {
+ +              if (tp->syn_smc && !tp->rx_opt.smc_ok)
+ +                      tp->syn_smc = 0;
+ +      }
+ +#endif
+ +}
+ +
   static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                                          const struct tcphdr *th)
   {
@@@ -5652,6 -5709,10 +5652,6 @@@
                         tp->tcp_header_len = sizeof(struct tcphdr);
                 }
   
- -              if (tcp_is_sack(tp) && sysctl_tcp_fack)
- -                      tcp_enable_fack(tp);
- -
- -              tcp_mtup_init(sk);
                 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
                 tcp_initialize_rcv_mss(sk);
   
@@@ -5660,8 -5721,6 +5660,8 @@@
                  * is initialized. */
                 tp->copied_seq = tp->rcv_nxt;
   
+ +              smc_check_reset_syn(tp);
+ +
                 smp_mb();
   
                 tcp_finish_connect(sk, skb);
@@@ -5879,18 -5938,15 +5879,18 @@@ int tcp_rcv_state_process(struct sock *
                 if (req) {
                         inet_csk(sk)->icsk_retransmits = 0;
                         reqsk_fastopen_remove(sk, req, false);
+ +                      /* Re-arm the timer because data may have been sent out.
+ +                       * This is similar to the regular data transmission case
+ +                       * when new data has just been ack'ed.
+ +                       *
+ +                       * (TFO) - we could try to be more aggressive and
+ +                       * retransmitting any data sooner based on when they
+ +                       * are sent out.
+ +                       */
+ +                      tcp_rearm_rto(sk);
                 } else {
- -                      /* Make sure socket is routed, for correct metrics. */
- -                      icsk->icsk_af_ops->rebuild_header(sk);
- -                      tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
- -                      tcp_init_congestion_control(sk);
- -
- -                      tcp_mtup_init(sk);
+ +                      tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
                         tp->copied_seq = tp->rcv_nxt;
- -                      tcp_init_buffer_space(sk);
                 }
                 smp_mb();
                 tcp_set_state(sk, TCP_ESTABLISHED);
@@@ -5910,6 -5966,19 +5910,6 @@@
                 if (tp->rx_opt.tstamp_ok)
                         tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
   
- -              if (req) {
- -                      /* Re-arm the timer because data may have been sent out.
- -                       * This is similar to the regular data transmission case
- -                       * when new data has just been ack'ed.
- -                       *
- -                       * (TFO) - we could try to be more aggressive and
- -                       * retransmitting any data sooner based on when they
- -                       * are sent out.
- -                       */
- -                      tcp_rearm_rto(sk);
- -              } else
- -                      tcp_init_metrics(sk);
- -
                 if (!inet_csk(sk)->icsk_ca_ops->cong_control)
                         tcp_update_pacing_rate(sk);
   
@@@ -6006,7 -6075,6 +6006,7 @@@
         case TCP_LAST_ACK:
                 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
                         break;
+ +              /* fall through */
         case TCP_FIN_WAIT1:
         case TCP_FIN_WAIT2:
                 /* RFC 793 says to queue data in these states,
@@@ -6115,9 -6183,6 +6115,9 @@@ static void tcp_openreq_init(struct req
         ireq->ir_rmt_port = tcp_hdr(skb)->source;
         ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
         ireq->ir_mark = inet_request_mark(sk, skb);
+ +#if IS_ENABLED(CONFIG_SMC)
+ +      ireq->smc_ok = rx_opt->smc_ok;
+ +#endif
   }
   
   struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
@@@ -6130,7 -6195,6 +6130,6 @@@
         if (req) {
                 struct inet_request_sock *ireq = inet_rsk(req);
   
-               kmemcheck_annotate_bitfield(ireq, flags);
                 ireq->ireq_opt = NULL;
   #if IS_ENABLED(CONFIG_IPV6)
                 ireq->pktopts = NULL;
@@@ -6293,7 -6357,7 +6292,7 @@@ int tcp_conn_request(struct request_soc
         tcp_openreq_init_rwin(req, sk, dst);
         if (!want_cookie) {
                 tcp_reqsk_record_syn(sk, req, skb);
- -              fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc);
+ +              fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
         }
         if (fastopen_sk) {
                 af_ops->send_synack(fastopen_sk, dst, &fl, req,
author	Linus Torvalds <[email protected]>
	Thu, 16 Nov 2017 03:42:40 +0000 (19:42 -0800)
committer	Linus Torvalds <[email protected]>
	Thu, 16 Nov 2017 03:42:40 +0000 (19:42 -0800)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/infiniband/hw/qib/qib_init.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/infiniband/sw/rdmavt/qp.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/amazon/ena/ena_netdev.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/cavium/liquidio/octeon_network.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx4/en_rx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/netronome/nfp/nfp_net_common.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sfc/falcon/rx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sfc/rx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/ti/netcp_core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/virtio_net.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fuse/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ocfs2/dlmfs/dlmfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/filter.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/skbuff.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/inet_sock.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/sock.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/bpf/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/skbuff.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/sock.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/inet_timewait_sock.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp_input.c	patch \|	diff1 \|	diff2 \|	blob \| history