]> Git Repo - linux.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
authorLinus Torvalds <[email protected]>
Sat, 19 Mar 2016 17:05:34 +0000 (10:05 -0700)
committerLinus Torvalds <[email protected]>
Sat, 19 Mar 2016 17:05:34 +0000 (10:05 -0700)
Pull networking updates from David Miller:
 "Highlights:

   1) Support more Realtek wireless chips, from Jes Sorenson.

   2) New BPF types for per-cpu hash and arrap maps, from Alexei
      Starovoitov.

   3) Make several TCP sysctls per-namespace, from Nikolay Borisov.

   4) Allow the use of SO_REUSEPORT in order to do per-thread processing
   of incoming TCP/UDP connections.  The muxing can be done using a
   BPF program which hashes the incoming packet.  From Craig Gallek.

   5) Add a multiplexer for TCP streams, to provide a messaged based
      interface.  BPF programs can be used to determine the message
      boundaries.  From Tom Herbert.

   6) Add 802.1AE MACSEC support, from Sabrina Dubroca.

   7) Avoid factorial complexity when taking down an inetdev interface
      with lots of configured addresses.  We were doing things like
      traversing the entire address less for each address removed, and
      flushing the entire netfilter conntrack table for every address as
      well.

   8) Add and use SKB bulk free infrastructure, from Jesper Brouer.

   9) Allow offloading u32 classifiers to hardware, and implement for
      ixgbe, from John Fastabend.

  10) Allow configuring IRQ coalescing parameters on a per-queue basis,
      from Kan Liang.

  11) Extend ethtool so that larger link mode masks can be supported.
      From David Decotigny.

  12) Introduce devlink, which can be used to configure port link types
      (ethernet vs Infiniband, etc.), port splitting, and switch device
      level attributes as a whole.  From Jiri Pirko.

  13) Hardware offload support for flower classifiers, from Amir Vadai.

  14) Add "Local Checksum Offload".  Basically, for a tunneled packet
      the checksum of the outer header is 'constant' (because with the
      checksum field filled into the inner protocol header, the payload
      of the outer frame checksums to 'zero'), and we can take advantage
      of that in various ways.  From Edward Cree"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1548 commits)
  bonding: fix bond_get_stats()
  net: bcmgenet: fix dma api length mismatch
  net/mlx4_core: Fix backward compatibility on VFs
  phy: mdio-thunder: Fix some Kconfig typos
  lan78xx: add ndo_get_stats64
  lan78xx: handle statistics counter rollover
  RDS: TCP: Remove unused constant
  RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
  net: smc911x: convert pxa dma to dmaengine
  team: remove duplicate set of flag IFF_MULTICAST
  bonding: remove duplicate set of flag IFF_MULTICAST
  net: fix a comment typo
  ethernet: micrel: fix some error codes
  ip_tunnels, bpf: define IP_TUNNEL_OPTS_MAX and use it
  bpf, dst: add and use dst_tclassid helper
  bpf: make skb->tc_classid also readable
  net: mvneta: bm: clarify dependencies
  cls_bpf: reset class and reuse major in da
  ldmvsw: Checkpatch sunvnet.c and sunvnet_common.c
  ldmvsw: Add ldmvsw.c driver code
  ...

42 files changed:
1  2 
Documentation/devicetree/bindings/vendor-prefixes.txt
MAINTAINERS
arch/arm/boot/dts/armada-xp-db.dts
arch/arm/boot/dts/armada-xp-gp.dts
arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
arch/arm64/boot/dts/apm/apm-storm.dtsi
arch/tile/configs/tilegx_defconfig
arch/tile/configs/tilepro_defconfig
arch/x86/events/core.c
arch/x86/kernel/dumpstack.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx5/main.c
drivers/net/ethernet/freescale/gianfar.c
drivers/net/ethernet/intel/fm10k/fm10k_main.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/port.c
drivers/net/wireless/marvell/mwifiex/debugfs.c
include/linux/kernel.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/linux/perf_event.h
include/net/sctp/structs.h
include/net/tcp.h
include/uapi/linux/Kbuild
lib/Kconfig.debug
net/core/sock.c
net/ipv4/syncookies.c
net/ipv4/tcp.c
net/ipv4/tcp_fastopen.c
net/ipv4/tcp_ipv4.c
net/ipv6/syncookies.c
net/ipv6/tcp_ipv6.c
net/rxrpc/ar-internal.h
net/rxrpc/rxkad.c
net/sctp/sm_make_chunk.c
net/sctp/socket.c

index ee66defcdd8b74d7cdaa581ac6ad94f79ace1c09,c6134dcd2e04431f8e9f13f7b7bc4bf7617598c3..156731cc649cd3f6c17972b6668f40d8b2089cd6
@@@ -28,7 -28,6 +28,7 @@@ arm   ARM Ltd
  armadeus      ARMadeus Systems SARL
  artesyn       Artesyn Embedded Technologies Inc.
  asahi-kasei   Asahi Kasei Corp.
 +atlas Atlas Scientific LLC
  atmel Atmel Corporation
  auo   AU Optronics Corporation
  avago Avago Technologies
@@@ -112,6 -111,7 +112,7 @@@ hp Hewlett Packar
  i2se  I2SE GmbH
  ibm   International Business Machines (IBM)
  idt   Integrated Device Technologies, Inc.
+ ifi   Ingenieurburo Fur Ic-Technologie (I/F/I)
  iom   Iomega Corporation
  img   Imagination Technologies Ltd.
  ingenic       Ingenic Semiconductor
@@@ -121,7 -121,6 +122,7 @@@ intercontrol       Inter Control Grou
  invensense    InvenSense Inc.
  isee  ISEE 2007 S.L.
  isil  Intersil
 +issi  Integrated Silicon Solutions Inc.
  jedec JEDEC Solid State Technology Association
  karo  Ka-Ro electronics GmbH
  keymile       Keymile GmbH
@@@ -172,7 -171,6 +173,7 @@@ opencores  OpenCores.or
  option        Option NV
  ortustech     Ortus Technology Co., Ltd.
  ovti  OmniVision Technologies
 +ORCL  Oracle Corporation
  panasonic     Panasonic Corporation
  parade        Parade Technologies Inc.
  pericom       Pericom Technology Inc.
@@@ -207,7 -205,6 +208,7 @@@ seagate    Seagate Technology PL
  semtech       Semtech Corporation
  sgx   SGX Sensortech
  sharp Sharp Corporation
 +si-en Si-En Technology Ltd.
  sigma Sigma Designs, Inc.
  sil   Silicon Image
  silabs        Silicon Laboratories
@@@ -230,9 -227,7 +231,9 @@@ st STMicroelectronic
  startek       Startek
  ste   ST-Ericsson
  stericsson    ST-Ericsson
 +syna  Synaptics Inc.
  synology      Synology, Inc.
 +SUNW  Sun Microsystems, Inc
  tbs   TBS Technologies
  tcl   Toby Churchill Ltd.
  technologic   Technologic Systems
diff --combined MAINTAINERS
index 2933d90512a3a8610a97f2c2f3723292209cb183,b70294ea7d634a0bf37ce5dcb52a00ea14cb87f6..74acd99f19c424f6b2b6c2369af6deb2954c2189
@@@ -151,7 -151,7 +151,7 @@@ S: Maintaine
  F:    drivers/scsi/53c700*
  
  6LOWPAN GENERIC (BTLE/IEEE 802.15.4)
- M:    Alexander Aring <a[email protected]>
+ M:    Alexander Aring <a[email protected]>
  M:    Jukka Rissanen <[email protected]>
  L:    [email protected]
  L:    [email protected]
@@@ -238,12 -238,6 +238,12 @@@ L:       [email protected]
  S:    Maintained
  F:    drivers/hwmon/abituguru3.c
  
 +ACCES 104-DIO-48E GPIO DRIVER
 +M:    William Breathitt Gray <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/gpio/gpio-104-dio-48e.c
 +
  ACCES 104-IDI-48 GPIO DRIVER
  M:    "William Breathitt Gray" <[email protected]>
  L:    [email protected]
@@@ -775,12 -769,6 +775,12 @@@ L:       [email protected] (moderat
  S:    Maintained
  F:    sound/aoa/
  
 +APEX EMBEDDED SYSTEMS STX104 DAC DRIVER
 +M:    William Breathitt Gray <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/iio/dac/stx104.c
 +
  APM DRIVER
  M:    Jiri Kosina <[email protected]>
  S:    Odd fixes
@@@ -1968,12 -1956,6 +1968,12 @@@ M:    Nicolas Ferre <[email protected]
  S:    Supported
  F:    drivers/tty/serial/atmel_serial.c
  
 +ATMEL SAMA5D2 ADC DRIVER
 +M:    Ludovic Desroches <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +F:    drivers/iio/adc/at91-sama5d2_adc.c
 +
  ATMEL Audio ALSA driver
  M:    Nicolas Ferre <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
@@@ -2176,7 -2158,8 +2176,8 @@@ M:      Marek Lindner <mareklindner@neomailb
  M:    Simon Wunderlich <[email protected]>
  M:    Antonio Quartulli <[email protected]>
  L:    [email protected]
- W:    http://www.open-mesh.org/
+ W:    https://www.open-mesh.org/
+ Q:    https://patchwork.open-mesh.org/project/batman/list/
  S:    Maintained
  F:    net/batman-adv/
  
@@@ -2440,13 -2423,13 +2441,14 @@@ F:   arch/mips/bmips/
  F:    arch/mips/include/asm/mach-bmips/*
  F:    arch/mips/kernel/*bmips*
  F:    arch/mips/boot/dts/brcm/bcm*.dts*
 +F:    drivers/irqchip/irq-bcm63*
  F:    drivers/irqchip/irq-bcm7*
  F:    drivers/irqchip/irq-brcmstb*
  F:    include/linux/bcm963xx_nvram.h
  F:    include/linux/bcm963xx_tag.h
  
  BROADCOM TG3 GIGABIT ETHERNET DRIVER
+ M:    Siva Reddy Kallam <[email protected]>
  M:    Prashant Sreedharan <[email protected]>
  M:    Michael Chan <[email protected]>
  L:    [email protected]
@@@ -3523,6 -3506,14 +3525,14 @@@ F:    include/linux/device-mapper.
  F:    include/linux/dm-*.h
  F:    include/uapi/linux/dm-*.h
  
+ DEVLINK
+ M:    Jiri Pirko <[email protected]>
+ L:    [email protected]
+ S:    Supported
+ F:    net/core/devlink.c
+ F:    include/net/devlink.h
+ F:    include/uapi/linux/devlink.h
  DIALOG SEMICONDUCTOR DRIVERS
  M:    Support Opensource <[email protected]>
  W:    http://www.dialog-semiconductor.com/products
  S:    Maintained
  F:    drivers/staging/dgnc/
  
 -DIGI EPCA PCI PRODUCTS
 -M:    Lidza Louina <[email protected]>
 -M:    Daeseok Youn <[email protected]>
 -L:    [email protected]
 -S:    Maintained
 -F:    drivers/staging/dgap/
 -
  DIOLAN U2C-12 I2C DRIVER
  M:    Guenter Roeck <[email protected]>
  L:    [email protected]
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
  S:    Maintained
  F:    Documentation/gpio/
 +F:    Documentation/ABI/testing/gpio-cdev
 +F:    Documentation/ABI/obsolete/sysfs-gpio
  F:    drivers/gpio/
  F:    include/linux/gpio/
  F:    include/linux/gpio.h
  F:    include/asm-generic/gpio.h
 +F:    include/uapi/linux/gpio.h
 +F:    tools/gpio/
  
  GRE DEMULTIPLEXER DRIVER
  M:    Dmitry Kozlov <[email protected]>
@@@ -4994,7 -4988,6 +5004,7 @@@ F:      include/linux/hw_random.
  
  HARDWARE SPINLOCK CORE
  M:    Ohad Ben-Cohen <[email protected]>
 +M:    Bjorn Andersson <[email protected]>
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/hwspinlock.git
  F:    Documentation/hwspinlock.txt
@@@ -5016,10 -5009,16 +5026,10 @@@ T:   git git://linuxtv.org/anttip/media_t
  S:    Maintained
  F:    drivers/media/dvb-frontends/hd29l2*
  
 -HEWLETT-PACKARD SMART2 RAID DRIVER
 -L:    [email protected]
 -S:    Orphan
 -F:    Documentation/blockdev/cpqarray.txt
 -F:    drivers/block/cpqarray.*
 -
  HEWLETT-PACKARD SMART ARRAY RAID DRIVER (hpsa)
 -M:    Don Brace <don.brace@pmcs.com>
 +M:    Don Brace <don.brace@microsemi.com>
  L:    [email protected]
 -L:    storagedev@pmcs.com
 +L:    esc.storagedev@microsemi.com
  L:    [email protected]
  S:    Supported
  F:    Documentation/scsi/hpsa.txt
@@@ -5028,9 -5027,9 +5038,9 @@@ F:      include/linux/cciss*.
  F:    include/uapi/linux/cciss*.h
  
  HEWLETT-PACKARD SMART CISS RAID DRIVER (cciss)
 -M:    Don Brace <don.brace@pmcs.com>
 +M:    Don Brace <don.brace@microsemi.com>
  L:    [email protected]
 -L:    storagedev@pmcs.com
 +L:    esc.storagedev@microsemi.com
  L:    [email protected]
  S:    Supported
  F:    Documentation/blockdev/cciss.txt
@@@ -5215,7 -5214,6 +5225,7 @@@ F:      arch/x86/kernel/cpu/mshyperv.
  F:    drivers/hid/hid-hyperv.c
  F:    drivers/hv/
  F:    drivers/input/serio/hyperv-keyboard.c
 +F:    drivers/pci/host/pci-hyperv.c
  F:    drivers/net/hyperv/
  F:    drivers/scsi/storvsc_drv.c
  F:    drivers/video/fbdev/hyperv_fb.c
@@@ -5447,10 -5445,11 +5457,11 @@@ S:   Supporte
  F:    drivers/idle/i7300_idle.c
  
  IEEE 802.15.4 SUBSYSTEM
- M:    Alexander Aring <a[email protected]>
+ M:    Alexander Aring <a[email protected]>
  L:    [email protected]
- W:    https://github.com/linux-wpan
- T:    git git://github.com/linux-wpan/linux-wpan-next.git
+ W:    http://wpan.cakelab.org/
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
  S:    Maintained
  F:    net/ieee802154/
  F:    net/mac802154/
@@@ -5580,7 -5579,6 +5591,7 @@@ F:      drivers/input
  F:    include/linux/input.h
  F:    include/uapi/linux/input.h
  F:    include/linux/input/
 +F:    Documentation/devicetree/bindings/input/
  
  INPUT MULTITOUCH (MT) PROTOCOL
  M:    Henrik Rydberg <[email protected]>
@@@ -5775,7 -5773,6 +5786,7 @@@ S:      Supporte
  F:    include/uapi/linux/mei.h
  F:    include/linux/mei_cl_bus.h
  F:    drivers/misc/mei/*
 +F:    drivers/watchdog/mei_wdt.c
  F:    Documentation/misc-devices/mei/*
  
  INTEL MIC DRIVERS (mic)
@@@ -6078,7 -6075,7 +6089,7 @@@ S:      Maintaine
  F:    drivers/media/platform/rcar_jpu.c
  
  JSM Neo PCI based serial card
 -M:    Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
 +M:    Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
  L:    [email protected]
  S:    Maintained
  F:    drivers/tty/serial/jsm/
@@@ -6596,10 -6593,9 +6607,10 @@@ F:    drivers/platform/x86/hp_accel.
  
  LIVE PATCHING
  M:    Josh Poimboeuf <[email protected]>
 -M:    Seth Jennings <sjenning@redhat.com>
 +M:    Jessica Yu <jeyu@redhat.com>
  M:    Jiri Kosina <[email protected]>
 -M:    Vojtech Pavlik <[email protected]>
 +M:    Miroslav Benes <[email protected]>
 +R:    Petr Mladek <[email protected]>
  S:    Maintained
  F:    kernel/livepatch/
  F:    include/linux/livepatch.h
@@@ -6610,11 -6606,6 +6621,11 @@@ F:    samples/livepatch
  L:    [email protected]
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/livepatching.git
  
 +LINUX KERNEL DUMP TEST MODULE (LKDTM)
 +M:    Kees Cook <[email protected]>
 +S:    Maintained
 +F:    drivers/misc/lkdtm.c
 +
  LLC (802.2)
  M:    Arnaldo Carvalho de Melo <[email protected]>
  S:    Maintained
@@@ -6700,12 -6691,13 +6711,12 @@@ S:   Maintaine
  F:    arch/arm/mach-lpc32xx/
  
  LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
 -M:    Nagalakshmi Nandigama <[email protected]>
 -M:    Praveen Krishnamoorthy <[email protected]>
 -M:    Sreekanth Reddy <[email protected]>
 -M:    Abhijit Mahajan <[email protected]>
 -L:    [email protected]
 +M:    Sathya Prakash <[email protected]>
 +M:    Chaitra P B <[email protected]>
 +M:    Suganath Prabu Subramani <[email protected]>
 +L:    [email protected]
  L:    [email protected]
 -W:    http://www.lsilogic.com/support
 +W:    http://www.avagotech.com/support/
  S:    Supported
  F:    drivers/message/fusion/
  F:    drivers/scsi/mpt2sas/
@@@ -6928,7 -6920,7 +6939,7 @@@ MAXIM MAX77802 MULTIFUNCTION PMIC DEVIC
  M:    Javier Martinez Canillas <[email protected]>
  L:    [email protected]
  S:    Supported
 -F:    drivers/*/*max77802.c
 +F:    drivers/*/*max77802*.c
  F:    Documentation/devicetree/bindings/*/*max77802.txt
  F:    include/dt-bindings/*/*max77802.h
  
@@@ -6938,7 -6930,7 +6949,7 @@@ M:      Krzysztof Kozlowski <k.kozlowski@sam
  L:    [email protected]
  S:    Supported
  F:    drivers/*/max14577.c
 -F:    drivers/*/max77686.c
 +F:    drivers/*/max77686*.c
  F:    drivers/*/max77693.c
  F:    drivers/extcon/extcon-max14577.c
  F:    drivers/extcon/extcon-max77693.c
@@@ -7043,6 -7035,13 +7054,13 @@@ F:    include/uapi/linux/meye.
  F:    include/uapi/linux/ivtv*
  F:    include/uapi/linux/uvcvideo.h
  
+ MEDIATEK ETHERNET DRIVER
+ M:    Felix Fietkau <[email protected]>
+ M:    John Crispin <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    drivers/net/ethernet/mediatek/
  MEDIATEK MT7601U WIRELESS LAN DRIVER
  M:    Jakub Kicinski <[email protected]>
  L:    [email protected]
  W:    https://linuxtv.org
  W:    http://palosaari.fi/linux/
  Q:    http://patchwork.linuxtv.org/project/linux-media/list/
 -T:    git git://linuxtv.org/anttip/media_tree.git
  S:    Maintained
 -F:    drivers/staging/media/mn88473/
 -F:    drivers/media/dvb-frontends/mn88473.h
 +F:    drivers/media/dvb-frontends/mn88473*
  
  MODULE SUPPORT
  M:    Rusty Russell <[email protected]>
@@@ -7416,17 -7417,6 +7434,17 @@@ W:    https://www.myricom.com/support/down
  S:    Supported
  F:    drivers/net/ethernet/myricom/myri10ge/
  
 +NAND FLASH SUBSYSTEM
 +M:    Boris Brezillon <[email protected]>
 +R:    Richard Weinberger <[email protected]>
 +L:    [email protected]
 +W:    http://www.linux-mtd.infradead.org/
 +Q:    http://patchwork.ozlabs.org/project/linux-mtd/list/
 +T:    git git://github.com/linux-nand/linux.git
 +S:    Maintained
 +F:    drivers/mtd/nand/
 +F:    include/linux/mtd/nand*.h
 +
  NATSEMI ETHERNET DRIVER (DP8381x)
  S:    Orphan
  F:    drivers/net/ethernet/natsemi/natsemi.c
@@@ -7540,7 -7530,6 +7558,6 @@@ F:      net/netrom
  
  NETRONOME ETHERNET DRIVERS
  M:    Jakub Kicinski <[email protected]>
- M:    Rolf Neugebauer <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    drivers/net/ethernet/netronome/
@@@ -7677,7 -7666,6 +7694,6 @@@ F:      net/nfc
  F:    include/net/nfc/
  F:    include/uapi/linux/nfc.h
  F:    drivers/nfc/
- F:    include/linux/platform_data/microread.h
  F:    include/linux/platform_data/nfcmrvl.h
  F:    include/linux/platform_data/nxp-nci.h
  F:    include/linux/platform_data/pn544.h
@@@ -8201,13 -8189,6 +8217,13 @@@ S:    Maintaine
  F:    Documentation/mn10300/
  F:    arch/mn10300/
  
 +PARALLEL LCD/KEYPAD PANEL DRIVER
 +M:      Willy Tarreau <[email protected]>
 +M:      Ksenija Stanojevic <[email protected]>
 +S:      Odd Fixes
 +F:      Documentation/misc-devices/lcd-panel-cgram.txt
 +F:      drivers/misc/panel.c
 +
  PARALLEL PORT SUBSYSTEM
  M:    Sudip Mukherjee <[email protected]>
  M:    Sudip Mukherjee <[email protected]>
  S:    Maintained
  F:    drivers/pci/host/*designware*
  
 +PCI DRIVER FOR SYNOPSYS PROTOTYPING DEVICE
 +M:    Joao Pinto <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/pci/designware-pcie.txt
 +F:    drivers/pci/host/pcie-designware-plat.c
 +
  PCI DRIVER FOR GENERIC OF HOSTS
  M:    Will Deacon <[email protected]>
  L:    [email protected]
  L:    [email protected] (moderated for non-subscribers)
  S:    Maintained
  F:    Documentation/devicetree/bindings/pci/host-generic-pci.txt
 +F:    drivers/pci/host/pci-host-common.c
  F:    drivers/pci/host/pci-host-generic.c
  
  PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD)
  S:     Maintained
  F:     drivers/pci/host/*qcom*
  
 +PCIE DRIVER FOR CAVIUM THUNDERX
 +M:    David Daney <[email protected]>
 +L:    [email protected]
 +L:    [email protected] (moderated for non-subscribers)
 +S:    Supported
 +F:    Documentation/devicetree/bindings/pci/pci-thunder-*
 +F:    drivers/pci/host/pci-thunder-*
 +
  PCMCIA SUBSYSTEM
  P:    Linux PCMCIA Team
  L:    [email protected]
@@@ -8498,7 -8463,7 +8514,7 @@@ F:      include/crypto/pcrypt.
  
  PER-CPU MEMORY ALLOCATOR
  M:    Tejun Heo <[email protected]>
 -M:    Christoph Lameter <cl@linux-foundation.org>
 +M:    Christoph Lameter <cl@linux.com>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
  S:    Maintained
  F:    include/linux/percpu*.h
@@@ -8515,7 -8480,6 +8531,7 @@@ PERFORMANCE EVENTS SUBSYSTE
  M:    Peter Zijlstra <[email protected]>
  M:    Ingo Molnar <[email protected]>
  M:    Arnaldo Carvalho de Melo <[email protected]>
 +R:    Alexander Shishkin <[email protected]>
  L:    [email protected]
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
  S:    Supported
@@@ -9138,10 -9102,14 +9154,14 @@@ S:   Maintaine
  F:    drivers/net/ethernet/rdc/r6040.c
  
  RDS - RELIABLE DATAGRAM SOCKETS
- M:    Chien Yen <[email protected]>
+ M:    Santosh Shilimkar <[email protected]>
+ L:    [email protected]
+ L:    [email protected]
  L:    [email protected] (moderated for non-subscribers)
+ W:    https://oss.oracle.com/projects/rds/
  S:    Supported
  F:    net/rds/
+ F:    Documentation/networking/rds.txt
  
  READ-COPY UPDATE (RCU)
  M:    "Paul E. McKenney" <[email protected]>
@@@ -9194,7 -9162,6 +9214,7 @@@ F:      include/linux/regmap.
  
  REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM
  M:    Ohad Ben-Cohen <[email protected]>
 +M:    Bjorn Andersson <[email protected]>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/remoteproc.git
  S:    Maintained
  F:    drivers/remoteproc/
@@@ -9203,7 -9170,6 +9223,7 @@@ F:      include/linux/remoteproc.
  
  REMOTE PROCESSOR MESSAGING (RPMSG) SUBSYSTEM
  M:    Ohad Ben-Cohen <[email protected]>
 +M:    Bjorn Andersson <[email protected]>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/rpmsg.git
  S:    Maintained
  F:    drivers/rpmsg/
@@@ -9542,6 -9508,7 +9562,7 @@@ F:      drivers/media/i2c/s5k5baf.
  
  SAMSUNG S3FWRN5 NFC DRIVER
  M:    Robert Baldyga <[email protected]>
+ M:    Krzysztof Opasiak <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  S:    Supported
  F:    drivers/nfc/s3fwrn5
@@@ -9715,7 -9682,7 +9736,7 @@@ F:      drivers/scsi/sg.
  F:    include/scsi/sg.h
  
  SCSI SUBSYSTEM
 -M:    "James E.J. Bottomley" <JBottomley@odin.com>
 +M:    "James E.J. Bottomley" <[email protected].com>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git
  M:    "Martin K. Petersen" <[email protected]>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git
  S:    Maintained
  F:    drivers/staging/nvec/
  
 -STAGING - OLPC SECONDARY DISPLAY CONTROLLER (DCON)
 -M:    Jens Frederich <[email protected]>
 -M:    Daniel Drake <[email protected]>
 -M:    Jon Nettleton <[email protected]>
 -W:    http://wiki.laptop.org/go/DCON
 -S:    Maintained
 -F:    drivers/staging/olpc_dcon/
 -
 -STAGING - PARALLEL LCD/KEYPAD PANEL DRIVER
 -M:    Willy Tarreau <[email protected]>
 -S:    Odd Fixes
 -F:    drivers/staging/panel/
 -
  STAGING - REALTEK RTL8712U DRIVERS
  M:    Larry Finger <[email protected]>
  M:    Florian Schilhabel <[email protected]>.
  S:    Maintained
  F:    drivers/thermal/ti-soc-thermal/
  
 +TI VPE/CAL DRIVERS
 +M:    Benoit Parrot <[email protected]>
 +L:    [email protected]
 +W:    http://linuxtv.org/
 +Q:    http://patchwork.linuxtv.org/project/linux-media/list/
 +S:    Maintained
 +F:    drivers/media/platform/ti-vpe/
 +
  TI CDCE706 CLOCK DRIVER
  M:    Max Filippov <[email protected]>
  S:    Maintained
@@@ -11140,8 -11112,8 +11161,8 @@@ M:   Jarkko Sakkinen <jarkko.sakkinen@lin
  R:    Jason Gunthorpe <[email protected]>
  W:    http://tpmdd.sourceforge.net
  L:    [email protected] (moderated for non-subscribers)
 -Q:    git git://github.com/PeterHuewe/linux-tpmdd.git
 -T:    git https://github.com/PeterHuewe/linux-tpmdd
 +Q:    https://patchwork.kernel.org/project/tpmdd-devel/list/
 +T:    git git://git.infradead.org/users/jjs/linux-tpmdd.git
  S:    Maintained
  F:    drivers/char/tpm/
  
@@@ -11296,6 -11268,7 +11317,6 @@@ F:   include/linux/cdrom.
  F:    include/uapi/linux/cdrom.h
  
  UNISYS S-PAR DRIVERS
 -M:    Benjamin Romer <[email protected]>
  M:    David Kershner <[email protected]>
  L:    [email protected] (Unisys internal)
  S:    Supported
@@@ -11320,7 -11293,7 +11341,7 @@@ F:   include/linux/mtd/ubi.
  F:    include/uapi/mtd/ubi-user.h
  
  USB ACM DRIVER
 -M:    Oliver Neukum <o[email protected]>
 +M:    Oliver Neukum <o[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/usb/acm.txt
@@@ -11405,6 -11378,13 +11426,13 @@@ S: Maintaine
  F:    drivers/usb/host/isp116x*
  F:    include/linux/usb/isp116x.h
  
+ USB LAN78XX ETHERNET DRIVER
+ M:    Woojung Huh <[email protected]>
+ M:    Microchip Linux Driver Support <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    drivers/net/usb/lan78xx.*
  USB MASS STORAGE DRIVER
  M:    Matthew Dharm <[email protected]>
  L:    [email protected]
@@@ -11444,7 -11424,6 +11472,7 @@@ M:   Valentina Manea <valentina.manea.m@g
  M:    Shuah Khan <[email protected]>
  L:    [email protected]
  S:    Maintained
 +F:    Documentation/usb/usbip_protocol.txt
  F:    drivers/usb/usbip/
  F:    tools/usb/usbip/
  
@@@ -11935,12 -11914,6 +11963,12 @@@ M: David Härdeman <[email protected]
  S:    Maintained
  F:    drivers/media/rc/winbond-cir.c
  
 +WINSYSTEMS WS16C48 GPIO DRIVER
 +M:    William Breathitt Gray <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/gpio/gpio-ws16c48.c
 +
  WIMAX STACK
  M:    Inaky Perez-Gonzalez <[email protected]>
  M:    [email protected]
index ebe1d267406df5ab30e3a3189b669733eb8fcaa4,30657302305d737375014d9ff363b3a67d54f2bd..cca366590561077b2fd5aeeb358c97eced2bfa66
@@@ -76,8 -76,9 +76,9 @@@
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
                          MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
 -                        MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
 -                        MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000
 +                        MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
++                        MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000
+                         MBUS_ID(0x0c, 0x04) 0 0 0xf1200000 0x100000>;
  
                devbus-bootcs {
                        status = "okay";
                                status = "okay";
                                phy = <&phy0>;
                                phy-mode = "rgmii-id";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <0>;
                        };
                        ethernet@74000 {
                                status = "okay";
                                phy = <&phy1>;
                                phy-mode = "rgmii-id";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <1>;
                        };
                        ethernet@30000 {
                                status = "okay";
                                phy = <&phy2>;
                                phy-mode = "sgmii";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <2>;
                        };
                        ethernet@34000 {
                                status = "okay";
                                phy = <&phy3>;
                                phy-mode = "sgmii";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <3>;
+                       };
+                       bm@c0000 {
+                               status = "okay";
                        };
  
                        mvsdio@d4000 {
                                };
                        };
                };
+               bm-bppi {
+                       status = "okay";
+               };
        };
  };
index 5730b875c4f51a1aa2743d8881b18d6dbc0b27cd,a1ded01d0c0762da87f81c27c14e1377811e2d3f..061f4237760e7c917d11056b97b5971bcd4680b5
@@@ -95,8 -95,9 +95,9 @@@
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
                          MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
 -                        MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
 -                        MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000
 +                        MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
++                        MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000
+                         MBUS_ID(0x0c, 0x04) 0 0 0xf1200000 0x100000>;
  
                devbus-bootcs {
                        status = "okay";
                                status = "okay";
                                phy = <&phy0>;
                                phy-mode = "qsgmii";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <0>;
                        };
                        ethernet@74000 {
                                status = "okay";
                                phy = <&phy1>;
                                phy-mode = "qsgmii";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <1>;
                        };
                        ethernet@30000 {
                                status = "okay";
                                phy = <&phy2>;
                                phy-mode = "qsgmii";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <2>;
                        };
                        ethernet@34000 {
                                status = "okay";
                                phy = <&phy3>;
                                phy-mode = "qsgmii";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <3>;
                        };
  
                        /* Front-side USB slot */
                                };
                        };
  
+                       bm@c0000 {
+                               status = "okay";
+                       };
                        nand@d0000 {
                                status = "okay";
                                num-cs = <1>;
                                nand-on-flash-bbt;
                        };
                };
+               bm-bppi {
+                       status = "okay";
+               };
        };
  };
index 853bd392a4fe20155ed1469a23175213814ca9dc,3aa29a91c7b8c5b4b17af8fe1fecae2cf04e3c8b..ed3b889d16ce439a4e933cdcd17ff37ed6adc4c4
        soc {
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
 -                        MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x8000000
 -                        MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
 -                        MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000
 +                        MBUS_ID(0x01, 0x2f) 0 0 0xe8000000 0x8000000
 +                        MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
++                        MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000
+                         MBUS_ID(0x0c, 0x04) 0 0 0xd1200000 0x100000>;
  
                devbus-bootcs {
                        status = "okay";
                                status = "okay";
                                phy = <&phy0>;
                                phy-mode = "sgmii";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <0>;
                        };
                        ethernet@74000 {
                                status = "okay";
                                phy = <&phy1>;
                                phy-mode = "sgmii";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <1>;
                        };
                        ethernet@30000 {
                                status = "okay";
                                phy = <&phy2>;
                                phy-mode = "sgmii";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <2>;
                        };
                        ethernet@34000 {
                                status = "okay";
                                phy = <&phy3>;
                                phy-mode = "sgmii";
+                               buffer-manager = <&bm>;
+                               bm,pool-long = <3>;
                        };
                        i2c@11000 {
                                status = "okay";
                        usb@51000 {
                                status = "okay";
                        };
+                       bm@c0000 {
+                               status = "okay";
+                       };
+               };
+               bm-bppi {
+                       status = "okay";
                };
        };
  };
index 3e40bd469cea13c010c08c6434ae1d0aa9df81af,cafb2c2715fbae02551ffe32bc2693463d665423..b7d7109e73049fd6dede3f2cd2f6d6688ba5ab81
                        reg = <0x0 0x1054a000 0x0 0x20>;
                };
  
 +              rb: rb@7e000000 {
 +                      compatible = "apm,xgene-rb", "syscon";
 +                      reg = <0x0 0x7e000000 0x0 0x10>;
 +              };
 +
                edac@78800000 {
                        compatible = "apm,xgene-edac";
                        #address-cells = <2>;
                        regmap-mcba = <&mcba>;
                        regmap-mcbb = <&mcbb>;
                        regmap-efuse = <&efuse>;
 +                      regmap-rb = <&rb>;
                        reg = <0x0 0x78800000 0x0 0x100>;
                        interrupts = <0x0 0x20 0x4>,
                                     <0x0 0x21 0x4>,
                              <0x0 0x18000000 0x0 0X200>;
                        reg-names = "enet_csr", "ring_csr", "ring_cmd";
                        interrupts = <0x0 0x60 0x4>,
-                                    <0x0 0x61 0x4>;
+                                    <0x0 0x61 0x4>,
+                                    <0x0 0x62 0x4>,
+                                    <0x0 0x63 0x4>,
+                                    <0x0 0x64 0x4>,
+                                    <0x0 0x65 0x4>,
+                                    <0x0 0x66 0x4>,
+                                    <0x0 0x67 0x4>;
                        dma-coherent;
                        clocks = <&xge0clk 0>;
                        /* mac address will be overwritten by the bootloader */
index c1387b7f447da9e70c0cee1e5cedc564010b26ad,984fa00a8c253be9d4a4cf3c0dd61d666e3eb752..3f3dfb8b150a114adb5d0e29d2f453b89f8194ad
@@@ -222,7 -222,7 +222,7 @@@ CONFIG_TUN=
  CONFIG_VETH=m
  CONFIG_NET_DSA_MV88E6060=y
  CONFIG_NET_DSA_MV88E6131=y
- CONFIG_NET_DSA_MV88E6123_61_65=y
+ CONFIG_NET_DSA_MV88E6123=y
  CONFIG_SKY2=y
  CONFIG_PTP_1588_CLOCK_TILEGX=y
  # CONFIG_WLAN is not set
@@@ -374,6 -374,7 +374,6 @@@ CONFIG_DEBUG_CREDENTIALS=
  CONFIG_RCU_CPU_STALL_TIMEOUT=60
  CONFIG_ASYNC_RAID6_TEST=m
  CONFIG_KGDB=y
 -CONFIG_KEYS_DEBUG_PROC_KEYS=y
  CONFIG_SECURITY=y
  CONFIG_SECURITYFS=y
  CONFIG_SECURITY_NETWORK=y
index 6d9ce8af11074eef6a283ad08c5247f000fba206,71ad9f7e40c9fb80fe31bd43a416be70c8ab422d..ef9e27eb2f50cfa11b2c931bbbdc31928ba49385
@@@ -341,7 -341,7 +341,7 @@@ CONFIG_TUN=
  CONFIG_VETH=m
  CONFIG_NET_DSA_MV88E6060=y
  CONFIG_NET_DSA_MV88E6131=y
- CONFIG_NET_DSA_MV88E6123_61_65=y
+ CONFIG_NET_DSA_MV88E6123=y
  # CONFIG_NET_VENDOR_3COM is not set
  CONFIG_E1000E=y
  # CONFIG_WLAN is not set
@@@ -486,6 -486,7 +486,6 @@@ CONFIG_DEBUG_LIST=
  CONFIG_DEBUG_CREDENTIALS=y
  CONFIG_RCU_CPU_STALL_TIMEOUT=60
  CONFIG_ASYNC_RAID6_TEST=m
 -CONFIG_KEYS_DEBUG_PROC_KEYS=y
  CONFIG_SECURITY=y
  CONFIG_SECURITYFS=y
  CONFIG_SECURITY_NETWORK=y
diff --combined arch/x86/events/core.c
index 5e830d0c95c999780bae39ecc1ce84c003fa1ece,0000000000000000000000000000000000000000..9b6ad08aa51a7e2ebc432e38b54b19b2021f266f
mode 100644,000000..100644
--- /dev/null
@@@ -1,2442 -1,0 +1,2442 @@@
- static void backtrace_address(void *data, unsigned long addr, int reliable)
 +/*
 + * Performance events x86 architecture code
 + *
 + *  Copyright (C) 2008 Thomas Gleixner <[email protected]>
 + *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
 + *  Copyright (C) 2009 Jaswinder Singh Rajput
 + *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
 + *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
 + *  Copyright (C) 2009 Intel Corporation, <[email protected]>
 + *  Copyright (C) 2009 Google, Inc., Stephane Eranian
 + *
 + *  For licencing details see kernel-base/COPYING
 + */
 +
 +#include <linux/perf_event.h>
 +#include <linux/capability.h>
 +#include <linux/notifier.h>
 +#include <linux/hardirq.h>
 +#include <linux/kprobes.h>
 +#include <linux/module.h>
 +#include <linux/kdebug.h>
 +#include <linux/sched.h>
 +#include <linux/uaccess.h>
 +#include <linux/slab.h>
 +#include <linux/cpu.h>
 +#include <linux/bitops.h>
 +#include <linux/device.h>
 +
 +#include <asm/apic.h>
 +#include <asm/stacktrace.h>
 +#include <asm/nmi.h>
 +#include <asm/smp.h>
 +#include <asm/alternative.h>
 +#include <asm/mmu_context.h>
 +#include <asm/tlbflush.h>
 +#include <asm/timer.h>
 +#include <asm/desc.h>
 +#include <asm/ldt.h>
 +
 +#include "perf_event.h"
 +
 +struct x86_pmu x86_pmu __read_mostly;
 +
 +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 +      .enabled = 1,
 +};
 +
 +struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
 +
 +u64 __read_mostly hw_cache_event_ids
 +                              [PERF_COUNT_HW_CACHE_MAX]
 +                              [PERF_COUNT_HW_CACHE_OP_MAX]
 +                              [PERF_COUNT_HW_CACHE_RESULT_MAX];
 +u64 __read_mostly hw_cache_extra_regs
 +                              [PERF_COUNT_HW_CACHE_MAX]
 +                              [PERF_COUNT_HW_CACHE_OP_MAX]
 +                              [PERF_COUNT_HW_CACHE_RESULT_MAX];
 +
 +/*
 + * Propagate event elapsed time into the generic event.
 + * Can only be executed on the CPU where the event is active.
 + * Returns the delta events processed.
 + */
 +u64 x86_perf_event_update(struct perf_event *event)
 +{
 +      struct hw_perf_event *hwc = &event->hw;
 +      int shift = 64 - x86_pmu.cntval_bits;
 +      u64 prev_raw_count, new_raw_count;
 +      int idx = hwc->idx;
 +      s64 delta;
 +
 +      if (idx == INTEL_PMC_IDX_FIXED_BTS)
 +              return 0;
 +
 +      /*
 +       * Careful: an NMI might modify the previous event value.
 +       *
 +       * Our tactic to handle this is to first atomically read and
 +       * exchange a new raw count - then add that new-prev delta
 +       * count to the generic event atomically:
 +       */
 +again:
 +      prev_raw_count = local64_read(&hwc->prev_count);
 +      rdpmcl(hwc->event_base_rdpmc, new_raw_count);
 +
 +      if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 +                                      new_raw_count) != prev_raw_count)
 +              goto again;
 +
 +      /*
 +       * Now we have the new raw value and have updated the prev
 +       * timestamp already. We can now calculate the elapsed delta
 +       * (event-)time and add that to the generic event.
 +       *
 +       * Careful, not all hw sign-extends above the physical width
 +       * of the count.
 +       */
 +      delta = (new_raw_count << shift) - (prev_raw_count << shift);
 +      delta >>= shift;
 +
 +      local64_add(delta, &event->count);
 +      local64_sub(delta, &hwc->period_left);
 +
 +      return new_raw_count;
 +}
 +
 +/*
 + * Find and validate any extra registers to set up.
 + */
 +static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 +{
 +      struct hw_perf_event_extra *reg;
 +      struct extra_reg *er;
 +
 +      reg = &event->hw.extra_reg;
 +
 +      if (!x86_pmu.extra_regs)
 +              return 0;
 +
 +      for (er = x86_pmu.extra_regs; er->msr; er++) {
 +              if (er->event != (config & er->config_mask))
 +                      continue;
 +              if (event->attr.config1 & ~er->valid_mask)
 +                      return -EINVAL;
 +              /* Check if the extra msrs can be safely accessed*/
 +              if (!er->extra_msr_access)
 +                      return -ENXIO;
 +
 +              reg->idx = er->idx;
 +              reg->config = event->attr.config1;
 +              reg->reg = er->msr;
 +              break;
 +      }
 +      return 0;
 +}
 +
 +static atomic_t active_events;
 +static atomic_t pmc_refcount;
 +static DEFINE_MUTEX(pmc_reserve_mutex);
 +
 +#ifdef CONFIG_X86_LOCAL_APIC
 +
 +static bool reserve_pmc_hardware(void)
 +{
 +      int i;
 +
 +      for (i = 0; i < x86_pmu.num_counters; i++) {
 +              if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
 +                      goto perfctr_fail;
 +      }
 +
 +      for (i = 0; i < x86_pmu.num_counters; i++) {
 +              if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
 +                      goto eventsel_fail;
 +      }
 +
 +      return true;
 +
 +eventsel_fail:
 +      for (i--; i >= 0; i--)
 +              release_evntsel_nmi(x86_pmu_config_addr(i));
 +
 +      i = x86_pmu.num_counters;
 +
 +perfctr_fail:
 +      for (i--; i >= 0; i--)
 +              release_perfctr_nmi(x86_pmu_event_addr(i));
 +
 +      return false;
 +}
 +
 +static void release_pmc_hardware(void)
 +{
 +      int i;
 +
 +      for (i = 0; i < x86_pmu.num_counters; i++) {
 +              release_perfctr_nmi(x86_pmu_event_addr(i));
 +              release_evntsel_nmi(x86_pmu_config_addr(i));
 +      }
 +}
 +
 +#else
 +
 +static bool reserve_pmc_hardware(void) { return true; }
 +static void release_pmc_hardware(void) {}
 +
 +#endif
 +
 +static bool check_hw_exists(void)
 +{
 +      u64 val, val_fail, val_new= ~0;
 +      int i, reg, reg_fail, ret = 0;
 +      int bios_fail = 0;
 +      int reg_safe = -1;
 +
 +      /*
 +       * Check to see if the BIOS enabled any of the counters, if so
 +       * complain and bail.
 +       */
 +      for (i = 0; i < x86_pmu.num_counters; i++) {
 +              reg = x86_pmu_config_addr(i);
 +              ret = rdmsrl_safe(reg, &val);
 +              if (ret)
 +                      goto msr_fail;
 +              if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
 +                      bios_fail = 1;
 +                      val_fail = val;
 +                      reg_fail = reg;
 +              } else {
 +                      reg_safe = i;
 +              }
 +      }
 +
 +      if (x86_pmu.num_counters_fixed) {
 +              reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 +              ret = rdmsrl_safe(reg, &val);
 +              if (ret)
 +                      goto msr_fail;
 +              for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
 +                      if (val & (0x03 << i*4)) {
 +                              bios_fail = 1;
 +                              val_fail = val;
 +                              reg_fail = reg;
 +                      }
 +              }
 +      }
 +
 +      /*
 +       * If all the counters are enabled, the below test will always
 +       * fail.  The tools will also become useless in this scenario.
 +       * Just fail and disable the hardware counters.
 +       */
 +
 +      if (reg_safe == -1) {
 +              reg = reg_safe;
 +              goto msr_fail;
 +      }
 +
 +      /*
 +       * Read the current value, change it and read it back to see if it
 +       * matches, this is needed to detect certain hardware emulators
 +       * (qemu/kvm) that don't trap on the MSR access and always return 0s.
 +       */
 +      reg = x86_pmu_event_addr(reg_safe);
 +      if (rdmsrl_safe(reg, &val))
 +              goto msr_fail;
 +      val ^= 0xffffUL;
 +      ret = wrmsrl_safe(reg, val);
 +      ret |= rdmsrl_safe(reg, &val_new);
 +      if (ret || val != val_new)
 +              goto msr_fail;
 +
 +      /*
 +       * We still allow the PMU driver to operate:
 +       */
 +      if (bios_fail) {
 +              pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
 +              pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
 +                            reg_fail, val_fail);
 +      }
 +
 +      return true;
 +
 +msr_fail:
 +      pr_cont("Broken PMU hardware detected, using software events only.\n");
 +      pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
 +              boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
 +              reg, val_new);
 +
 +      return false;
 +}
 +
 +static void hw_perf_event_destroy(struct perf_event *event)
 +{
 +      x86_release_hardware();
 +      atomic_dec(&active_events);
 +}
 +
 +void hw_perf_lbr_event_destroy(struct perf_event *event)
 +{
 +      hw_perf_event_destroy(event);
 +
 +      /* undo the lbr/bts event accounting */
 +      x86_del_exclusive(x86_lbr_exclusive_lbr);
 +}
 +
 +static inline int x86_pmu_initialized(void)
 +{
 +      return x86_pmu.handle_irq != NULL;
 +}
 +
 +static inline int
 +set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
 +{
 +      struct perf_event_attr *attr = &event->attr;
 +      unsigned int cache_type, cache_op, cache_result;
 +      u64 config, val;
 +
 +      config = attr->config;
 +
 +      cache_type = (config >>  0) & 0xff;
 +      if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
 +              return -EINVAL;
 +
 +      cache_op = (config >>  8) & 0xff;
 +      if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
 +              return -EINVAL;
 +
 +      cache_result = (config >> 16) & 0xff;
 +      if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 +              return -EINVAL;
 +
 +      val = hw_cache_event_ids[cache_type][cache_op][cache_result];
 +
 +      if (val == 0)
 +              return -ENOENT;
 +
 +      if (val == -1)
 +              return -EINVAL;
 +
 +      hwc->config |= val;
 +      attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
 +      return x86_pmu_extra_regs(val, event);
 +}
 +
 +int x86_reserve_hardware(void)
 +{
 +      int err = 0;
 +
 +      if (!atomic_inc_not_zero(&pmc_refcount)) {
 +              mutex_lock(&pmc_reserve_mutex);
 +              if (atomic_read(&pmc_refcount) == 0) {
 +                      if (!reserve_pmc_hardware())
 +                              err = -EBUSY;
 +                      else
 +                              reserve_ds_buffers();
 +              }
 +              if (!err)
 +                      atomic_inc(&pmc_refcount);
 +              mutex_unlock(&pmc_reserve_mutex);
 +      }
 +
 +      return err;
 +}
 +
 +void x86_release_hardware(void)
 +{
 +      if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
 +              release_pmc_hardware();
 +              release_ds_buffers();
 +              mutex_unlock(&pmc_reserve_mutex);
 +      }
 +}
 +
 +/*
 + * Check if we can create event of a certain type (that no conflicting events
 + * are present).
 + */
 +int x86_add_exclusive(unsigned int what)
 +{
 +      int i;
 +
 +      if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
 +              mutex_lock(&pmc_reserve_mutex);
 +              for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
 +                      if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
 +                              goto fail_unlock;
 +              }
 +              atomic_inc(&x86_pmu.lbr_exclusive[what]);
 +              mutex_unlock(&pmc_reserve_mutex);
 +      }
 +
 +      atomic_inc(&active_events);
 +      return 0;
 +
 +fail_unlock:
 +      mutex_unlock(&pmc_reserve_mutex);
 +      return -EBUSY;
 +}
 +
 +void x86_del_exclusive(unsigned int what)
 +{
 +      atomic_dec(&x86_pmu.lbr_exclusive[what]);
 +      atomic_dec(&active_events);
 +}
 +
 +int x86_setup_perfctr(struct perf_event *event)
 +{
 +      struct perf_event_attr *attr = &event->attr;
 +      struct hw_perf_event *hwc = &event->hw;
 +      u64 config;
 +
 +      if (!is_sampling_event(event)) {
 +              hwc->sample_period = x86_pmu.max_period;
 +              hwc->last_period = hwc->sample_period;
 +              local64_set(&hwc->period_left, hwc->sample_period);
 +      }
 +
 +      if (attr->type == PERF_TYPE_RAW)
 +              return x86_pmu_extra_regs(event->attr.config, event);
 +
 +      if (attr->type == PERF_TYPE_HW_CACHE)
 +              return set_ext_hw_attr(hwc, event);
 +
 +      if (attr->config >= x86_pmu.max_events)
 +              return -EINVAL;
 +
 +      /*
 +       * The generic map:
 +       */
 +      config = x86_pmu.event_map(attr->config);
 +
 +      if (config == 0)
 +              return -ENOENT;
 +
 +      if (config == -1LL)
 +              return -EINVAL;
 +
 +      /*
 +       * Branch tracing:
 +       */
 +      if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
 +          !attr->freq && hwc->sample_period == 1) {
 +              /* BTS is not supported by this architecture. */
 +              if (!x86_pmu.bts_active)
 +                      return -EOPNOTSUPP;
 +
 +              /* BTS is currently only allowed for user-mode. */
 +              if (!attr->exclude_kernel)
 +                      return -EOPNOTSUPP;
 +
 +              /* disallow bts if conflicting events are present */
 +              if (x86_add_exclusive(x86_lbr_exclusive_lbr))
 +                      return -EBUSY;
 +
 +              event->destroy = hw_perf_lbr_event_destroy;
 +      }
 +
 +      hwc->config |= config;
 +
 +      return 0;
 +}
 +
 +/*
 + * check that branch_sample_type is compatible with
 + * settings needed for precise_ip > 1 which implies
 + * using the LBR to capture ALL taken branches at the
 + * priv levels of the measurement
 + */
 +static inline int precise_br_compat(struct perf_event *event)
 +{
 +      u64 m = event->attr.branch_sample_type;
 +      u64 b = 0;
 +
 +      /* must capture all branches */
 +      if (!(m & PERF_SAMPLE_BRANCH_ANY))
 +              return 0;
 +
 +      m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
 +
 +      if (!event->attr.exclude_user)
 +              b |= PERF_SAMPLE_BRANCH_USER;
 +
 +      if (!event->attr.exclude_kernel)
 +              b |= PERF_SAMPLE_BRANCH_KERNEL;
 +
 +      /*
 +       * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
 +       */
 +
 +      return m == b;
 +}
 +
 +int x86_pmu_hw_config(struct perf_event *event)
 +{
 +      if (event->attr.precise_ip) {
 +              int precise = 0;
 +
 +              /* Support for constant skid */
 +              if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
 +                      precise++;
 +
 +                      /* Support for IP fixup */
 +                      if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
 +                              precise++;
 +
 +                      if (x86_pmu.pebs_prec_dist)
 +                              precise++;
 +              }
 +
 +              if (event->attr.precise_ip > precise)
 +                      return -EOPNOTSUPP;
 +      }
 +      /*
 +       * check that PEBS LBR correction does not conflict with
 +       * whatever the user is asking with attr->branch_sample_type
 +       */
 +      if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
 +              u64 *br_type = &event->attr.branch_sample_type;
 +
 +              if (has_branch_stack(event)) {
 +                      if (!precise_br_compat(event))
 +                              return -EOPNOTSUPP;
 +
 +                      /* branch_sample_type is compatible */
 +
 +              } else {
 +                      /*
 +                       * user did not specify  branch_sample_type
 +                       *
 +                       * For PEBS fixups, we capture all
 +                       * the branches at the priv level of the
 +                       * event.
 +                       */
 +                      *br_type = PERF_SAMPLE_BRANCH_ANY;
 +
 +                      if (!event->attr.exclude_user)
 +                              *br_type |= PERF_SAMPLE_BRANCH_USER;
 +
 +                      if (!event->attr.exclude_kernel)
 +                              *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
 +              }
 +      }
 +
 +      if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
 +              event->attach_state |= PERF_ATTACH_TASK_DATA;
 +
 +      /*
 +       * Generate PMC IRQs:
 +       * (keep 'enabled' bit clear for now)
 +       */
 +      event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
 +
 +      /*
 +       * Count user and OS events unless requested not to
 +       */
 +      if (!event->attr.exclude_user)
 +              event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
 +      if (!event->attr.exclude_kernel)
 +              event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
 +
 +      if (event->attr.type == PERF_TYPE_RAW)
 +              event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
 +
 +      if (event->attr.sample_period && x86_pmu.limit_period) {
 +              if (x86_pmu.limit_period(event, event->attr.sample_period) >
 +                              event->attr.sample_period)
 +                      return -EINVAL;
 +      }
 +
 +      return x86_setup_perfctr(event);
 +}
 +
 +/*
 + * Setup the hardware configuration for a given attr_type
 + */
 +static int __x86_pmu_event_init(struct perf_event *event)
 +{
 +      int err;
 +
 +      if (!x86_pmu_initialized())
 +              return -ENODEV;
 +
 +      err = x86_reserve_hardware();
 +      if (err)
 +              return err;
 +
 +      atomic_inc(&active_events);
 +      event->destroy = hw_perf_event_destroy;
 +
 +      event->hw.idx = -1;
 +      event->hw.last_cpu = -1;
 +      event->hw.last_tag = ~0ULL;
 +
 +      /* mark unused */
 +      event->hw.extra_reg.idx = EXTRA_REG_NONE;
 +      event->hw.branch_reg.idx = EXTRA_REG_NONE;
 +
 +      return x86_pmu.hw_config(event);
 +}
 +
 +void x86_pmu_disable_all(void)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      int idx;
 +
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 +              u64 val;
 +
 +              if (!test_bit(idx, cpuc->active_mask))
 +                      continue;
 +              rdmsrl(x86_pmu_config_addr(idx), val);
 +              if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
 +                      continue;
 +              val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
 +              wrmsrl(x86_pmu_config_addr(idx), val);
 +      }
 +}
 +
 +/*
 + * There may be PMI landing after enabled=0. The PMI hitting could be before or
 + * after disable_all.
 + *
 + * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
 + * It will not be re-enabled in the NMI handler again, because enabled=0. After
 + * handling the NMI, disable_all will be called, which will not change the
 + * state either. If PMI hits after disable_all, the PMU is already disabled
 + * before entering NMI handler. The NMI handler will not change the state
 + * either.
 + *
 + * So either situation is harmless.
 + */
 +static void x86_pmu_disable(struct pmu *pmu)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +
 +      if (!x86_pmu_initialized())
 +              return;
 +
 +      if (!cpuc->enabled)
 +              return;
 +
 +      cpuc->n_added = 0;
 +      cpuc->enabled = 0;
 +      barrier();
 +
 +      x86_pmu.disable_all();
 +}
 +
 +void x86_pmu_enable_all(int added)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      int idx;
 +
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 +              struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
 +
 +              if (!test_bit(idx, cpuc->active_mask))
 +                      continue;
 +
 +              __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
 +      }
 +}
 +
 +static struct pmu pmu;
 +
 +static inline int is_x86_event(struct perf_event *event)
 +{
 +      return event->pmu == &pmu;
 +}
 +
 +/*
 + * Event scheduler state:
 + *
 + * Assign events iterating over all events and counters, beginning
 + * with events with least weights first. Keep the current iterator
 + * state in struct sched_state.
 + */
 +struct sched_state {
 +      int     weight;
 +      int     event;          /* event index */
 +      int     counter;        /* counter index */
 +      int     unassigned;     /* number of events to be assigned left */
 +      int     nr_gp;          /* number of GP counters used */
 +      unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 +};
 +
 +/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
 +#define       SCHED_STATES_MAX        2
 +
 +struct perf_sched {
 +      int                     max_weight;
 +      int                     max_events;
 +      int                     max_gp;
 +      int                     saved_states;
 +      struct event_constraint **constraints;
 +      struct sched_state      state;
 +      struct sched_state      saved[SCHED_STATES_MAX];
 +};
 +
 +/*
 + * Initialize interator that runs through all events and counters.
 + */
 +static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
 +                          int num, int wmin, int wmax, int gpmax)
 +{
 +      int idx;
 +
 +      memset(sched, 0, sizeof(*sched));
 +      sched->max_events       = num;
 +      sched->max_weight       = wmax;
 +      sched->max_gp           = gpmax;
 +      sched->constraints      = constraints;
 +
 +      for (idx = 0; idx < num; idx++) {
 +              if (constraints[idx]->weight == wmin)
 +                      break;
 +      }
 +
 +      sched->state.event      = idx;          /* start with min weight */
 +      sched->state.weight     = wmin;
 +      sched->state.unassigned = num;
 +}
 +
 +static void perf_sched_save_state(struct perf_sched *sched)
 +{
 +      if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
 +              return;
 +
 +      sched->saved[sched->saved_states] = sched->state;
 +      sched->saved_states++;
 +}
 +
 +static bool perf_sched_restore_state(struct perf_sched *sched)
 +{
 +      if (!sched->saved_states)
 +              return false;
 +
 +      sched->saved_states--;
 +      sched->state = sched->saved[sched->saved_states];
 +
 +      /* continue with next counter: */
 +      clear_bit(sched->state.counter++, sched->state.used);
 +
 +      return true;
 +}
 +
 +/*
 + * Select a counter for the current event to schedule. Return true on
 + * success.
 + */
 +static bool __perf_sched_find_counter(struct perf_sched *sched)
 +{
 +      struct event_constraint *c;
 +      int idx;
 +
 +      if (!sched->state.unassigned)
 +              return false;
 +
 +      if (sched->state.event >= sched->max_events)
 +              return false;
 +
 +      c = sched->constraints[sched->state.event];
 +      /* Prefer fixed purpose counters */
 +      if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
 +              idx = INTEL_PMC_IDX_FIXED;
 +              for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
 +                      if (!__test_and_set_bit(idx, sched->state.used))
 +                              goto done;
 +              }
 +      }
 +
 +      /* Grab the first unused counter starting with idx */
 +      idx = sched->state.counter;
 +      for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
 +              if (!__test_and_set_bit(idx, sched->state.used)) {
 +                      if (sched->state.nr_gp++ >= sched->max_gp)
 +                              return false;
 +
 +                      goto done;
 +              }
 +      }
 +
 +      return false;
 +
 +done:
 +      sched->state.counter = idx;
 +
 +      if (c->overlap)
 +              perf_sched_save_state(sched);
 +
 +      return true;
 +}
 +
 +static bool perf_sched_find_counter(struct perf_sched *sched)
 +{
 +      while (!__perf_sched_find_counter(sched)) {
 +              if (!perf_sched_restore_state(sched))
 +                      return false;
 +      }
 +
 +      return true;
 +}
 +
 +/*
 + * Go through all unassigned events and find the next one to schedule.
 + * Take events with the least weight first. Return true on success.
 + */
 +static bool perf_sched_next_event(struct perf_sched *sched)
 +{
 +      struct event_constraint *c;
 +
 +      if (!sched->state.unassigned || !--sched->state.unassigned)
 +              return false;
 +
 +      do {
 +              /* next event */
 +              sched->state.event++;
 +              if (sched->state.event >= sched->max_events) {
 +                      /* next weight */
 +                      sched->state.event = 0;
 +                      sched->state.weight++;
 +                      if (sched->state.weight > sched->max_weight)
 +                              return false;
 +              }
 +              c = sched->constraints[sched->state.event];
 +      } while (c->weight != sched->state.weight);
 +
 +      sched->state.counter = 0;       /* start with first counter */
 +
 +      return true;
 +}
 +
 +/*
 + * Assign a counter for each event.
 + */
 +int perf_assign_events(struct event_constraint **constraints, int n,
 +                      int wmin, int wmax, int gpmax, int *assign)
 +{
 +      struct perf_sched sched;
 +
 +      perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
 +
 +      do {
 +              if (!perf_sched_find_counter(&sched))
 +                      break;  /* failed */
 +              if (assign)
 +                      assign[sched.state.event] = sched.state.counter;
 +      } while (perf_sched_next_event(&sched));
 +
 +      return sched.state.unassigned;
 +}
 +EXPORT_SYMBOL_GPL(perf_assign_events);
 +
 +int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 +{
 +      struct event_constraint *c;
 +      unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 +      struct perf_event *e;
 +      int i, wmin, wmax, unsched = 0;
 +      struct hw_perf_event *hwc;
 +
 +      bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 +
 +      if (x86_pmu.start_scheduling)
 +              x86_pmu.start_scheduling(cpuc);
 +
 +      for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 +              cpuc->event_constraint[i] = NULL;
 +              c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
 +              cpuc->event_constraint[i] = c;
 +
 +              wmin = min(wmin, c->weight);
 +              wmax = max(wmax, c->weight);
 +      }
 +
 +      /*
 +       * fastpath, try to reuse previous register
 +       */
 +      for (i = 0; i < n; i++) {
 +              hwc = &cpuc->event_list[i]->hw;
 +              c = cpuc->event_constraint[i];
 +
 +              /* never assigned */
 +              if (hwc->idx == -1)
 +                      break;
 +
 +              /* constraint still honored */
 +              if (!test_bit(hwc->idx, c->idxmsk))
 +                      break;
 +
 +              /* not already used */
 +              if (test_bit(hwc->idx, used_mask))
 +                      break;
 +
 +              __set_bit(hwc->idx, used_mask);
 +              if (assign)
 +                      assign[i] = hwc->idx;
 +      }
 +
 +      /* slow path */
 +      if (i != n) {
 +              int gpmax = x86_pmu.num_counters;
 +
 +              /*
 +               * Do not allow scheduling of more than half the available
 +               * generic counters.
 +               *
 +               * This helps avoid counter starvation of sibling thread by
 +               * ensuring at most half the counters cannot be in exclusive
 +               * mode. There is no designated counters for the limits. Any
 +               * N/2 counters can be used. This helps with events with
 +               * specific counter constraints.
 +               */
 +              if (is_ht_workaround_enabled() && !cpuc->is_fake &&
 +                  READ_ONCE(cpuc->excl_cntrs->exclusive_present))
 +                      gpmax /= 2;
 +
 +              unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
 +                                           wmax, gpmax, assign);
 +      }
 +
 +      /*
 +       * In case of success (unsched = 0), mark events as committed,
 +       * so we do not put_constraint() in case new events are added
 +       * and fail to be scheduled
 +       *
 +       * We invoke the lower level commit callback to lock the resource
 +       *
 +       * We do not need to do all of this in case we are called to
 +       * validate an event group (assign == NULL)
 +       */
 +      if (!unsched && assign) {
 +              for (i = 0; i < n; i++) {
 +                      e = cpuc->event_list[i];
 +                      e->hw.flags |= PERF_X86_EVENT_COMMITTED;
 +                      if (x86_pmu.commit_scheduling)
 +                              x86_pmu.commit_scheduling(cpuc, i, assign[i]);
 +              }
 +      } else {
 +              for (i = 0; i < n; i++) {
 +                      e = cpuc->event_list[i];
 +                      /*
 +                       * do not put_constraint() on comitted events,
 +                       * because they are good to go
 +                       */
 +                      if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
 +                              continue;
 +
 +                      /*
 +                       * release events that failed scheduling
 +                       */
 +                      if (x86_pmu.put_event_constraints)
 +                              x86_pmu.put_event_constraints(cpuc, e);
 +              }
 +      }
 +
 +      if (x86_pmu.stop_scheduling)
 +              x86_pmu.stop_scheduling(cpuc);
 +
 +      return unsched ? -EINVAL : 0;
 +}
 +
 +/*
 + * dogrp: true if must collect siblings events (group)
 + * returns total number of events and error code
 + */
 +static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
 +{
 +      struct perf_event *event;
 +      int n, max_count;
 +
 +      max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
 +
 +      /* current number of events already accepted */
 +      n = cpuc->n_events;
 +
 +      if (is_x86_event(leader)) {
 +              if (n >= max_count)
 +                      return -EINVAL;
 +              cpuc->event_list[n] = leader;
 +              n++;
 +      }
 +      if (!dogrp)
 +              return n;
 +
 +      list_for_each_entry(event, &leader->sibling_list, group_entry) {
 +              if (!is_x86_event(event) ||
 +                  event->state <= PERF_EVENT_STATE_OFF)
 +                      continue;
 +
 +              if (n >= max_count)
 +                      return -EINVAL;
 +
 +              cpuc->event_list[n] = event;
 +              n++;
 +      }
 +      return n;
 +}
 +
 +static inline void x86_assign_hw_event(struct perf_event *event,
 +                              struct cpu_hw_events *cpuc, int i)
 +{
 +      struct hw_perf_event *hwc = &event->hw;
 +
 +      hwc->idx = cpuc->assign[i];
 +      hwc->last_cpu = smp_processor_id();
 +      hwc->last_tag = ++cpuc->tags[i];
 +
 +      if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
 +              hwc->config_base = 0;
 +              hwc->event_base = 0;
 +      } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
 +              hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 +              hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
 +              hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
 +      } else {
 +              hwc->config_base = x86_pmu_config_addr(hwc->idx);
 +              hwc->event_base  = x86_pmu_event_addr(hwc->idx);
 +              hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
 +      }
 +}
 +
 +static inline int match_prev_assignment(struct hw_perf_event *hwc,
 +                                      struct cpu_hw_events *cpuc,
 +                                      int i)
 +{
 +      return hwc->idx == cpuc->assign[i] &&
 +              hwc->last_cpu == smp_processor_id() &&
 +              hwc->last_tag == cpuc->tags[i];
 +}
 +
 +static void x86_pmu_start(struct perf_event *event, int flags);
 +
 +static void x86_pmu_enable(struct pmu *pmu)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      struct perf_event *event;
 +      struct hw_perf_event *hwc;
 +      int i, added = cpuc->n_added;
 +
 +      if (!x86_pmu_initialized())
 +              return;
 +
 +      if (cpuc->enabled)
 +              return;
 +
 +      if (cpuc->n_added) {
 +              int n_running = cpuc->n_events - cpuc->n_added;
 +              /*
 +               * apply assignment obtained either from
 +               * hw_perf_group_sched_in() or x86_pmu_enable()
 +               *
 +               * step1: save events moving to new counters
 +               */
 +              for (i = 0; i < n_running; i++) {
 +                      event = cpuc->event_list[i];
 +                      hwc = &event->hw;
 +
 +                      /*
 +                       * we can avoid reprogramming counter if:
 +                       * - assigned same counter as last time
 +                       * - running on same CPU as last time
 +                       * - no other event has used the counter since
 +                       */
 +                      if (hwc->idx == -1 ||
 +                          match_prev_assignment(hwc, cpuc, i))
 +                              continue;
 +
 +                      /*
 +                       * Ensure we don't accidentally enable a stopped
 +                       * counter simply because we rescheduled.
 +                       */
 +                      if (hwc->state & PERF_HES_STOPPED)
 +                              hwc->state |= PERF_HES_ARCH;
 +
 +                      x86_pmu_stop(event, PERF_EF_UPDATE);
 +              }
 +
 +              /*
 +               * step2: reprogram moved events into new counters
 +               */
 +              for (i = 0; i < cpuc->n_events; i++) {
 +                      event = cpuc->event_list[i];
 +                      hwc = &event->hw;
 +
 +                      if (!match_prev_assignment(hwc, cpuc, i))
 +                              x86_assign_hw_event(event, cpuc, i);
 +                      else if (i < n_running)
 +                              continue;
 +
 +                      if (hwc->state & PERF_HES_ARCH)
 +                              continue;
 +
 +                      x86_pmu_start(event, PERF_EF_RELOAD);
 +              }
 +              cpuc->n_added = 0;
 +              perf_events_lapic_init();
 +      }
 +
 +      cpuc->enabled = 1;
 +      barrier();
 +
 +      x86_pmu.enable_all(added);
 +}
 +
 +static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
 +
 +/*
 + * Set the next IRQ period, based on the hwc->period_left value.
 + * To be called with the event disabled in hw:
 + */
 +int x86_perf_event_set_period(struct perf_event *event)
 +{
 +      struct hw_perf_event *hwc = &event->hw;
 +      s64 left = local64_read(&hwc->period_left);
 +      s64 period = hwc->sample_period;
 +      int ret = 0, idx = hwc->idx;
 +
 +      if (idx == INTEL_PMC_IDX_FIXED_BTS)
 +              return 0;
 +
 +      /*
 +       * If we are way outside a reasonable range then just skip forward:
 +       */
 +      if (unlikely(left <= -period)) {
 +              left = period;
 +              local64_set(&hwc->period_left, left);
 +              hwc->last_period = period;
 +              ret = 1;
 +      }
 +
 +      if (unlikely(left <= 0)) {
 +              left += period;
 +              local64_set(&hwc->period_left, left);
 +              hwc->last_period = period;
 +              ret = 1;
 +      }
 +      /*
 +       * Quirk: certain CPUs dont like it if just 1 hw_event is left:
 +       */
 +      if (unlikely(left < 2))
 +              left = 2;
 +
 +      if (left > x86_pmu.max_period)
 +              left = x86_pmu.max_period;
 +
 +      if (x86_pmu.limit_period)
 +              left = x86_pmu.limit_period(event, left);
 +
 +      per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
 +
 +      if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
 +          local64_read(&hwc->prev_count) != (u64)-left) {
 +              /*
 +               * The hw event starts counting from this event offset,
 +               * mark it to be able to extra future deltas:
 +               */
 +              local64_set(&hwc->prev_count, (u64)-left);
 +
 +              wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
 +      }
 +
 +      /*
 +       * Due to erratum on certan cpu we need
 +       * a second write to be sure the register
 +       * is updated properly
 +       */
 +      if (x86_pmu.perfctr_second_write) {
 +              wrmsrl(hwc->event_base,
 +                      (u64)(-left) & x86_pmu.cntval_mask);
 +      }
 +
 +      perf_event_update_userpage(event);
 +
 +      return ret;
 +}
 +
 +void x86_pmu_enable_event(struct perf_event *event)
 +{
 +      if (__this_cpu_read(cpu_hw_events.enabled))
 +              __x86_pmu_enable_event(&event->hw,
 +                                     ARCH_PERFMON_EVENTSEL_ENABLE);
 +}
 +
 +/*
 + * Add a single event to the PMU.
 + *
 + * The event is added to the group of enabled events
 + * but only if it can be scehduled with existing events.
 + */
 +static int x86_pmu_add(struct perf_event *event, int flags)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      struct hw_perf_event *hwc;
 +      int assign[X86_PMC_IDX_MAX];
 +      int n, n0, ret;
 +
 +      hwc = &event->hw;
 +
 +      n0 = cpuc->n_events;
 +      ret = n = collect_events(cpuc, event, false);
 +      if (ret < 0)
 +              goto out;
 +
 +      hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 +      if (!(flags & PERF_EF_START))
 +              hwc->state |= PERF_HES_ARCH;
 +
 +      /*
 +       * If group events scheduling transaction was started,
 +       * skip the schedulability test here, it will be performed
 +       * at commit time (->commit_txn) as a whole.
 +       */
 +      if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 +              goto done_collect;
 +
 +      ret = x86_pmu.schedule_events(cpuc, n, assign);
 +      if (ret)
 +              goto out;
 +      /*
 +       * copy new assignment, now we know it is possible
 +       * will be used by hw_perf_enable()
 +       */
 +      memcpy(cpuc->assign, assign, n*sizeof(int));
 +
 +done_collect:
 +      /*
 +       * Commit the collect_events() state. See x86_pmu_del() and
 +       * x86_pmu_*_txn().
 +       */
 +      cpuc->n_events = n;
 +      cpuc->n_added += n - n0;
 +      cpuc->n_txn += n - n0;
 +
 +      ret = 0;
 +out:
 +      return ret;
 +}
 +
 +static void x86_pmu_start(struct perf_event *event, int flags)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      int idx = event->hw.idx;
 +
 +      if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
 +              return;
 +
 +      if (WARN_ON_ONCE(idx == -1))
 +              return;
 +
 +      if (flags & PERF_EF_RELOAD) {
 +              WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
 +              x86_perf_event_set_period(event);
 +      }
 +
 +      event->hw.state = 0;
 +
 +      cpuc->events[idx] = event;
 +      __set_bit(idx, cpuc->active_mask);
 +      __set_bit(idx, cpuc->running);
 +      x86_pmu.enable(event);
 +      perf_event_update_userpage(event);
 +}
 +
 +void perf_event_print_debug(void)
 +{
 +      u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
 +      u64 pebs, debugctl;
 +      struct cpu_hw_events *cpuc;
 +      unsigned long flags;
 +      int cpu, idx;
 +
 +      if (!x86_pmu.num_counters)
 +              return;
 +
 +      local_irq_save(flags);
 +
 +      cpu = smp_processor_id();
 +      cpuc = &per_cpu(cpu_hw_events, cpu);
 +
 +      if (x86_pmu.version >= 2) {
 +              rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 +              rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 +              rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
 +              rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
 +
 +              pr_info("\n");
 +              pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
 +              pr_info("CPU#%d: status:     %016llx\n", cpu, status);
 +              pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
 +              pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
 +              if (x86_pmu.pebs_constraints) {
 +                      rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
 +                      pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
 +              }
 +              if (x86_pmu.lbr_nr) {
 +                      rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 +                      pr_info("CPU#%d: debugctl:   %016llx\n", cpu, debugctl);
 +              }
 +      }
 +      pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 +
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 +              rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
 +              rdmsrl(x86_pmu_event_addr(idx), pmc_count);
 +
 +              prev_left = per_cpu(pmc_prev_left[idx], cpu);
 +
 +              pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
 +                      cpu, idx, pmc_ctrl);
 +              pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
 +                      cpu, idx, pmc_count);
 +              pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
 +                      cpu, idx, prev_left);
 +      }
 +      for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
 +              rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
 +
 +              pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
 +                      cpu, idx, pmc_count);
 +      }
 +      local_irq_restore(flags);
 +}
 +
 +void x86_pmu_stop(struct perf_event *event, int flags)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      struct hw_perf_event *hwc = &event->hw;
 +
 +      if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
 +              x86_pmu.disable(event);
 +              cpuc->events[hwc->idx] = NULL;
 +              WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 +              hwc->state |= PERF_HES_STOPPED;
 +      }
 +
 +      if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 +              /*
 +               * Drain the remaining delta count out of a event
 +               * that we are disabling:
 +               */
 +              x86_perf_event_update(event);
 +              hwc->state |= PERF_HES_UPTODATE;
 +      }
 +}
 +
 +static void x86_pmu_del(struct perf_event *event, int flags)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      int i;
 +
 +      /*
 +       * event is descheduled
 +       */
 +      event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
 +
 +      /*
 +       * If we're called during a txn, we don't need to do anything.
 +       * The events never got scheduled and ->cancel_txn will truncate
 +       * the event_list.
 +       *
 +       * XXX assumes any ->del() called during a TXN will only be on
 +       * an event added during that same TXN.
 +       */
 +      if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 +              return;
 +
 +      /*
 +       * Not a TXN, therefore cleanup properly.
 +       */
 +      x86_pmu_stop(event, PERF_EF_UPDATE);
 +
 +      for (i = 0; i < cpuc->n_events; i++) {
 +              if (event == cpuc->event_list[i])
 +                      break;
 +      }
 +
 +      if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
 +              return;
 +
 +      /* If we have a newly added event; make sure to decrease n_added. */
 +      if (i >= cpuc->n_events - cpuc->n_added)
 +              --cpuc->n_added;
 +
 +      if (x86_pmu.put_event_constraints)
 +              x86_pmu.put_event_constraints(cpuc, event);
 +
 +      /* Delete the array entry. */
 +      while (++i < cpuc->n_events) {
 +              cpuc->event_list[i-1] = cpuc->event_list[i];
 +              cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
 +      }
 +      --cpuc->n_events;
 +
 +      perf_event_update_userpage(event);
 +}
 +
 +int x86_pmu_handle_irq(struct pt_regs *regs)
 +{
 +      struct perf_sample_data data;
 +      struct cpu_hw_events *cpuc;
 +      struct perf_event *event;
 +      int idx, handled = 0;
 +      u64 val;
 +
 +      cpuc = this_cpu_ptr(&cpu_hw_events);
 +
 +      /*
 +       * Some chipsets need to unmask the LVTPC in a particular spot
 +       * inside the nmi handler.  As a result, the unmasking was pushed
 +       * into all the nmi handlers.
 +       *
 +       * This generic handler doesn't seem to have any issues where the
 +       * unmasking occurs so it was left at the top.
 +       */
 +      apic_write(APIC_LVTPC, APIC_DM_NMI);
 +
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 +              if (!test_bit(idx, cpuc->active_mask)) {
 +                      /*
 +                       * Though we deactivated the counter some cpus
 +                       * might still deliver spurious interrupts still
 +                       * in flight. Catch them:
 +                       */
 +                      if (__test_and_clear_bit(idx, cpuc->running))
 +                              handled++;
 +                      continue;
 +              }
 +
 +              event = cpuc->events[idx];
 +
 +              val = x86_perf_event_update(event);
 +              if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
 +                      continue;
 +
 +              /*
 +               * event overflow
 +               */
 +              handled++;
 +              perf_sample_data_init(&data, 0, event->hw.last_period);
 +
 +              if (!x86_perf_event_set_period(event))
 +                      continue;
 +
 +              if (perf_event_overflow(event, &data, regs))
 +                      x86_pmu_stop(event, 0);
 +      }
 +
 +      if (handled)
 +              inc_irq_stat(apic_perf_irqs);
 +
 +      return handled;
 +}
 +
 +void perf_events_lapic_init(void)
 +{
 +      if (!x86_pmu.apic || !x86_pmu_initialized())
 +              return;
 +
 +      /*
 +       * Always use NMI for PMU
 +       */
 +      apic_write(APIC_LVTPC, APIC_DM_NMI);
 +}
 +
 +static int
 +perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 +{
 +      u64 start_clock;
 +      u64 finish_clock;
 +      int ret;
 +
 +      /*
 +       * All PMUs/events that share this PMI handler should make sure to
 +       * increment active_events for their events.
 +       */
 +      if (!atomic_read(&active_events))
 +              return NMI_DONE;
 +
 +      start_clock = sched_clock();
 +      ret = x86_pmu.handle_irq(regs);
 +      finish_clock = sched_clock();
 +
 +      perf_sample_event_took(finish_clock - start_clock);
 +
 +      return ret;
 +}
 +NOKPROBE_SYMBOL(perf_event_nmi_handler);
 +
 +struct event_constraint emptyconstraint;
 +struct event_constraint unconstrained;
 +
 +static int
 +x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 +{
 +      unsigned int cpu = (long)hcpu;
 +      struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 +      int i, ret = NOTIFY_OK;
 +
 +      switch (action & ~CPU_TASKS_FROZEN) {
 +      case CPU_UP_PREPARE:
 +              for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
 +                      cpuc->kfree_on_online[i] = NULL;
 +              if (x86_pmu.cpu_prepare)
 +                      ret = x86_pmu.cpu_prepare(cpu);
 +              break;
 +
 +      case CPU_STARTING:
 +              if (x86_pmu.cpu_starting)
 +                      x86_pmu.cpu_starting(cpu);
 +              break;
 +
 +      case CPU_ONLINE:
 +              for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
 +                      kfree(cpuc->kfree_on_online[i]);
 +                      cpuc->kfree_on_online[i] = NULL;
 +              }
 +              break;
 +
 +      case CPU_DYING:
 +              if (x86_pmu.cpu_dying)
 +                      x86_pmu.cpu_dying(cpu);
 +              break;
 +
 +      case CPU_UP_CANCELED:
 +      case CPU_DEAD:
 +              if (x86_pmu.cpu_dead)
 +                      x86_pmu.cpu_dead(cpu);
 +              break;
 +
 +      default:
 +              break;
 +      }
 +
 +      return ret;
 +}
 +
 +static void __init pmu_check_apic(void)
 +{
 +      if (cpu_has_apic)
 +              return;
 +
 +      x86_pmu.apic = 0;
 +      pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
 +      pr_info("no hardware sampling interrupt available.\n");
 +
 +      /*
 +       * If we have a PMU initialized but no APIC
 +       * interrupts, we cannot sample hardware
 +       * events (user-space has to fall back and
 +       * sample via a hrtimer based software event):
 +       */
 +      pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 +
 +}
 +
 +static struct attribute_group x86_pmu_format_group = {
 +      .name = "format",
 +      .attrs = NULL,
 +};
 +
 +/*
 + * Remove all undefined events (x86_pmu.event_map(id) == 0)
 + * out of events_attr attributes.
 + */
 +static void __init filter_events(struct attribute **attrs)
 +{
 +      struct device_attribute *d;
 +      struct perf_pmu_events_attr *pmu_attr;
 +      int offset = 0;
 +      int i, j;
 +
 +      for (i = 0; attrs[i]; i++) {
 +              d = (struct device_attribute *)attrs[i];
 +              pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
 +              /* str trumps id */
 +              if (pmu_attr->event_str)
 +                      continue;
 +              if (x86_pmu.event_map(i + offset))
 +                      continue;
 +
 +              for (j = i; attrs[j]; j++)
 +                      attrs[j] = attrs[j + 1];
 +
 +              /* Check the shifted attr. */
 +              i--;
 +
 +              /*
 +               * event_map() is index based, the attrs array is organized
 +               * by increasing event index. If we shift the events, then
 +               * we need to compensate for the event_map(), otherwise
 +               * we are looking up the wrong event in the map
 +               */
 +              offset++;
 +      }
 +}
 +
 +/* Merge two pointer arrays */
 +__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
 +{
 +      struct attribute **new;
 +      int j, i;
 +
 +      for (j = 0; a[j]; j++)
 +              ;
 +      for (i = 0; b[i]; i++)
 +              j++;
 +      j++;
 +
 +      new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
 +      if (!new)
 +              return NULL;
 +
 +      j = 0;
 +      for (i = 0; a[i]; i++)
 +              new[j++] = a[i];
 +      for (i = 0; b[i]; i++)
 +              new[j++] = b[i];
 +      new[j] = NULL;
 +
 +      return new;
 +}
 +
 +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
 +                        char *page)
 +{
 +      struct perf_pmu_events_attr *pmu_attr = \
 +              container_of(attr, struct perf_pmu_events_attr, attr);
 +      u64 config = x86_pmu.event_map(pmu_attr->id);
 +
 +      /* string trumps id */
 +      if (pmu_attr->event_str)
 +              return sprintf(page, "%s", pmu_attr->event_str);
 +
 +      return x86_pmu.events_sysfs_show(page, config);
 +}
 +
 +EVENT_ATTR(cpu-cycles,                        CPU_CYCLES              );
 +EVENT_ATTR(instructions,              INSTRUCTIONS            );
 +EVENT_ATTR(cache-references,          CACHE_REFERENCES        );
 +EVENT_ATTR(cache-misses,              CACHE_MISSES            );
 +EVENT_ATTR(branch-instructions,               BRANCH_INSTRUCTIONS     );
 +EVENT_ATTR(branch-misses,             BRANCH_MISSES           );
 +EVENT_ATTR(bus-cycles,                        BUS_CYCLES              );
 +EVENT_ATTR(stalled-cycles-frontend,   STALLED_CYCLES_FRONTEND );
 +EVENT_ATTR(stalled-cycles-backend,    STALLED_CYCLES_BACKEND  );
 +EVENT_ATTR(ref-cycles,                        REF_CPU_CYCLES          );
 +
 +static struct attribute *empty_attrs;
 +
 +static struct attribute *events_attr[] = {
 +      EVENT_PTR(CPU_CYCLES),
 +      EVENT_PTR(INSTRUCTIONS),
 +      EVENT_PTR(CACHE_REFERENCES),
 +      EVENT_PTR(CACHE_MISSES),
 +      EVENT_PTR(BRANCH_INSTRUCTIONS),
 +      EVENT_PTR(BRANCH_MISSES),
 +      EVENT_PTR(BUS_CYCLES),
 +      EVENT_PTR(STALLED_CYCLES_FRONTEND),
 +      EVENT_PTR(STALLED_CYCLES_BACKEND),
 +      EVENT_PTR(REF_CPU_CYCLES),
 +      NULL,
 +};
 +
 +static struct attribute_group x86_pmu_events_group = {
 +      .name = "events",
 +      .attrs = events_attr,
 +};
 +
 +ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
 +{
 +      u64 umask  = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
 +      u64 cmask  = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
 +      bool edge  = (config & ARCH_PERFMON_EVENTSEL_EDGE);
 +      bool pc    = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
 +      bool any   = (config & ARCH_PERFMON_EVENTSEL_ANY);
 +      bool inv   = (config & ARCH_PERFMON_EVENTSEL_INV);
 +      ssize_t ret;
 +
 +      /*
 +      * We have whole page size to spend and just little data
 +      * to write, so we can safely use sprintf.
 +      */
 +      ret = sprintf(page, "event=0x%02llx", event);
 +
 +      if (umask)
 +              ret += sprintf(page + ret, ",umask=0x%02llx", umask);
 +
 +      if (edge)
 +              ret += sprintf(page + ret, ",edge");
 +
 +      if (pc)
 +              ret += sprintf(page + ret, ",pc");
 +
 +      if (any)
 +              ret += sprintf(page + ret, ",any");
 +
 +      if (inv)
 +              ret += sprintf(page + ret, ",inv");
 +
 +      if (cmask)
 +              ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
 +
 +      ret += sprintf(page + ret, "\n");
 +
 +      return ret;
 +}
 +
 +static int __init init_hw_perf_events(void)
 +{
 +      struct x86_pmu_quirk *quirk;
 +      int err;
 +
 +      pr_info("Performance Events: ");
 +
 +      switch (boot_cpu_data.x86_vendor) {
 +      case X86_VENDOR_INTEL:
 +              err = intel_pmu_init();
 +              break;
 +      case X86_VENDOR_AMD:
 +              err = amd_pmu_init();
 +              break;
 +      default:
 +              err = -ENOTSUPP;
 +      }
 +      if (err != 0) {
 +              pr_cont("no PMU driver, software events only.\n");
 +              return 0;
 +      }
 +
 +      pmu_check_apic();
 +
 +      /* sanity check that the hardware exists or is emulated */
 +      if (!check_hw_exists())
 +              return 0;
 +
 +      pr_cont("%s PMU driver.\n", x86_pmu.name);
 +
 +      x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
 +
 +      for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
 +              quirk->func();
 +
 +      if (!x86_pmu.intel_ctrl)
 +              x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
 +
 +      perf_events_lapic_init();
 +      register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
 +
 +      unconstrained = (struct event_constraint)
 +              __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
 +                                 0, x86_pmu.num_counters, 0, 0);
 +
 +      x86_pmu_format_group.attrs = x86_pmu.format_attrs;
 +
 +      if (x86_pmu.event_attrs)
 +              x86_pmu_events_group.attrs = x86_pmu.event_attrs;
 +
 +      if (!x86_pmu.events_sysfs_show)
 +              x86_pmu_events_group.attrs = &empty_attrs;
 +      else
 +              filter_events(x86_pmu_events_group.attrs);
 +
 +      if (x86_pmu.cpu_events) {
 +              struct attribute **tmp;
 +
 +              tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
 +              if (!WARN_ON(!tmp))
 +                      x86_pmu_events_group.attrs = tmp;
 +      }
 +
 +      pr_info("... version:                %d\n",     x86_pmu.version);
 +      pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
 +      pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
 +      pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
 +      pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
 +      pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 +      pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 +
 +      perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 +      perf_cpu_notifier(x86_pmu_notifier);
 +
 +      return 0;
 +}
 +early_initcall(init_hw_perf_events);
 +
 +static inline void x86_pmu_read(struct perf_event *event)
 +{
 +      x86_perf_event_update(event);
 +}
 +
 +/*
 + * Start group events scheduling transaction
 + * Set the flag to make pmu::enable() not perform the
 + * schedulability test, it will be performed at commit time
 + *
 + * We only support PERF_PMU_TXN_ADD transactions. Save the
 + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
 + * transactions.
 + */
 +static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +
 +      WARN_ON_ONCE(cpuc->txn_flags);          /* txn already in flight */
 +
 +      cpuc->txn_flags = txn_flags;
 +      if (txn_flags & ~PERF_PMU_TXN_ADD)
 +              return;
 +
 +      perf_pmu_disable(pmu);
 +      __this_cpu_write(cpu_hw_events.n_txn, 0);
 +}
 +
 +/*
 + * Stop group events scheduling transaction
 + * Clear the flag and pmu::enable() will perform the
 + * schedulability test.
 + */
 +static void x86_pmu_cancel_txn(struct pmu *pmu)
 +{
 +      unsigned int txn_flags;
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +
 +      WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
 +
 +      txn_flags = cpuc->txn_flags;
 +      cpuc->txn_flags = 0;
 +      if (txn_flags & ~PERF_PMU_TXN_ADD)
 +              return;
 +
 +      /*
 +       * Truncate collected array by the number of events added in this
 +       * transaction. See x86_pmu_add() and x86_pmu_*_txn().
 +       */
 +      __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
 +      __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
 +      perf_pmu_enable(pmu);
 +}
 +
 +/*
 + * Commit group events scheduling transaction
 + * Perform the group schedulability test as a whole
 + * Return 0 if success
 + *
 + * Does not cancel the transaction on failure; expects the caller to do this.
 + */
 +static int x86_pmu_commit_txn(struct pmu *pmu)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      int assign[X86_PMC_IDX_MAX];
 +      int n, ret;
 +
 +      WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
 +
 +      if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
 +              cpuc->txn_flags = 0;
 +              return 0;
 +      }
 +
 +      n = cpuc->n_events;
 +
 +      if (!x86_pmu_initialized())
 +              return -EAGAIN;
 +
 +      ret = x86_pmu.schedule_events(cpuc, n, assign);
 +      if (ret)
 +              return ret;
 +
 +      /*
 +       * copy new assignment, now we know it is possible
 +       * will be used by hw_perf_enable()
 +       */
 +      memcpy(cpuc->assign, assign, n*sizeof(int));
 +
 +      cpuc->txn_flags = 0;
 +      perf_pmu_enable(pmu);
 +      return 0;
 +}
 +/*
 + * a fake_cpuc is used to validate event groups. Due to
 + * the extra reg logic, we need to also allocate a fake
 + * per_core and per_cpu structure. Otherwise, group events
 + * using extra reg may conflict without the kernel being
 + * able to catch this when the last event gets added to
 + * the group.
 + */
 +static void free_fake_cpuc(struct cpu_hw_events *cpuc)
 +{
 +      kfree(cpuc->shared_regs);
 +      kfree(cpuc);
 +}
 +
 +static struct cpu_hw_events *allocate_fake_cpuc(void)
 +{
 +      struct cpu_hw_events *cpuc;
 +      int cpu = raw_smp_processor_id();
 +
 +      cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
 +      if (!cpuc)
 +              return ERR_PTR(-ENOMEM);
 +
 +      /* only needed, if we have extra_regs */
 +      if (x86_pmu.extra_regs) {
 +              cpuc->shared_regs = allocate_shared_regs(cpu);
 +              if (!cpuc->shared_regs)
 +                      goto error;
 +      }
 +      cpuc->is_fake = 1;
 +      return cpuc;
 +error:
 +      free_fake_cpuc(cpuc);
 +      return ERR_PTR(-ENOMEM);
 +}
 +
 +/*
 + * validate that we can schedule this event
 + */
 +static int validate_event(struct perf_event *event)
 +{
 +      struct cpu_hw_events *fake_cpuc;
 +      struct event_constraint *c;
 +      int ret = 0;
 +
 +      fake_cpuc = allocate_fake_cpuc();
 +      if (IS_ERR(fake_cpuc))
 +              return PTR_ERR(fake_cpuc);
 +
 +      c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
 +
 +      if (!c || !c->weight)
 +              ret = -EINVAL;
 +
 +      if (x86_pmu.put_event_constraints)
 +              x86_pmu.put_event_constraints(fake_cpuc, event);
 +
 +      free_fake_cpuc(fake_cpuc);
 +
 +      return ret;
 +}
 +
 +/*
 + * validate a single event group
 + *
 + * validation include:
 + *    - check events are compatible which each other
 + *    - events do not compete for the same counter
 + *    - number of events <= number of counters
 + *
 + * validation ensures the group can be loaded onto the
 + * PMU if it was the only group available.
 + */
 +static int validate_group(struct perf_event *event)
 +{
 +      struct perf_event *leader = event->group_leader;
 +      struct cpu_hw_events *fake_cpuc;
 +      int ret = -EINVAL, n;
 +
 +      fake_cpuc = allocate_fake_cpuc();
 +      if (IS_ERR(fake_cpuc))
 +              return PTR_ERR(fake_cpuc);
 +      /*
 +       * the event is not yet connected with its
 +       * siblings therefore we must first collect
 +       * existing siblings, then add the new event
 +       * before we can simulate the scheduling
 +       */
 +      n = collect_events(fake_cpuc, leader, true);
 +      if (n < 0)
 +              goto out;
 +
 +      fake_cpuc->n_events = n;
 +      n = collect_events(fake_cpuc, event, false);
 +      if (n < 0)
 +              goto out;
 +
 +      fake_cpuc->n_events = n;
 +
 +      ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
 +
 +out:
 +      free_fake_cpuc(fake_cpuc);
 +      return ret;
 +}
 +
 +static int x86_pmu_event_init(struct perf_event *event)
 +{
 +      struct pmu *tmp;
 +      int err;
 +
 +      switch (event->attr.type) {
 +      case PERF_TYPE_RAW:
 +      case PERF_TYPE_HARDWARE:
 +      case PERF_TYPE_HW_CACHE:
 +              break;
 +
 +      default:
 +              return -ENOENT;
 +      }
 +
 +      err = __x86_pmu_event_init(event);
 +      if (!err) {
 +              /*
 +               * we temporarily connect event to its pmu
 +               * such that validate_group() can classify
 +               * it as an x86 event using is_x86_event()
 +               */
 +              tmp = event->pmu;
 +              event->pmu = &pmu;
 +
 +              if (event->group_leader != event)
 +                      err = validate_group(event);
 +              else
 +                      err = validate_event(event);
 +
 +              event->pmu = tmp;
 +      }
 +      if (err) {
 +              if (event->destroy)
 +                      event->destroy(event);
 +      }
 +
 +      if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
 +              event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
 +
 +      return err;
 +}
 +
 +static void refresh_pce(void *ignored)
 +{
 +      if (current->mm)
 +              load_mm_cr4(current->mm);
 +}
 +
 +static void x86_pmu_event_mapped(struct perf_event *event)
 +{
 +      if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
 +              return;
 +
 +      if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
 +              on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
 +}
 +
 +static void x86_pmu_event_unmapped(struct perf_event *event)
 +{
 +      if (!current->mm)
 +              return;
 +
 +      if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
 +              return;
 +
 +      if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
 +              on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
 +}
 +
 +static int x86_pmu_event_idx(struct perf_event *event)
 +{
 +      int idx = event->hw.idx;
 +
 +      if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
 +              return 0;
 +
 +      if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
 +              idx -= INTEL_PMC_IDX_FIXED;
 +              idx |= 1 << 30;
 +      }
 +
 +      return idx + 1;
 +}
 +
 +static ssize_t get_attr_rdpmc(struct device *cdev,
 +                            struct device_attribute *attr,
 +                            char *buf)
 +{
 +      return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
 +}
 +
 +static ssize_t set_attr_rdpmc(struct device *cdev,
 +                            struct device_attribute *attr,
 +                            const char *buf, size_t count)
 +{
 +      unsigned long val;
 +      ssize_t ret;
 +
 +      ret = kstrtoul(buf, 0, &val);
 +      if (ret)
 +              return ret;
 +
 +      if (val > 2)
 +              return -EINVAL;
 +
 +      if (x86_pmu.attr_rdpmc_broken)
 +              return -ENOTSUPP;
 +
 +      if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
 +              /*
 +               * Changing into or out of always available, aka
 +               * perf-event-bypassing mode.  This path is extremely slow,
 +               * but only root can trigger it, so it's okay.
 +               */
 +              if (val == 2)
 +                      static_key_slow_inc(&rdpmc_always_available);
 +              else
 +                      static_key_slow_dec(&rdpmc_always_available);
 +              on_each_cpu(refresh_pce, NULL, 1);
 +      }
 +
 +      x86_pmu.attr_rdpmc = val;
 +
 +      return count;
 +}
 +
 +static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
 +
 +static struct attribute *x86_pmu_attrs[] = {
 +      &dev_attr_rdpmc.attr,
 +      NULL,
 +};
 +
 +static struct attribute_group x86_pmu_attr_group = {
 +      .attrs = x86_pmu_attrs,
 +};
 +
 +static const struct attribute_group *x86_pmu_attr_groups[] = {
 +      &x86_pmu_attr_group,
 +      &x86_pmu_format_group,
 +      &x86_pmu_events_group,
 +      NULL,
 +};
 +
 +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 +{
 +      if (x86_pmu.sched_task)
 +              x86_pmu.sched_task(ctx, sched_in);
 +}
 +
 +void perf_check_microcode(void)
 +{
 +      if (x86_pmu.check_microcode)
 +              x86_pmu.check_microcode();
 +}
 +EXPORT_SYMBOL_GPL(perf_check_microcode);
 +
 +static struct pmu pmu = {
 +      .pmu_enable             = x86_pmu_enable,
 +      .pmu_disable            = x86_pmu_disable,
 +
 +      .attr_groups            = x86_pmu_attr_groups,
 +
 +      .event_init             = x86_pmu_event_init,
 +
 +      .event_mapped           = x86_pmu_event_mapped,
 +      .event_unmapped         = x86_pmu_event_unmapped,
 +
 +      .add                    = x86_pmu_add,
 +      .del                    = x86_pmu_del,
 +      .start                  = x86_pmu_start,
 +      .stop                   = x86_pmu_stop,
 +      .read                   = x86_pmu_read,
 +
 +      .start_txn              = x86_pmu_start_txn,
 +      .cancel_txn             = x86_pmu_cancel_txn,
 +      .commit_txn             = x86_pmu_commit_txn,
 +
 +      .event_idx              = x86_pmu_event_idx,
 +      .sched_task             = x86_pmu_sched_task,
 +      .task_ctx_size          = sizeof(struct x86_perf_task_context),
 +};
 +
 +void arch_perf_update_userpage(struct perf_event *event,
 +                             struct perf_event_mmap_page *userpg, u64 now)
 +{
 +      struct cyc2ns_data *data;
 +
 +      userpg->cap_user_time = 0;
 +      userpg->cap_user_time_zero = 0;
 +      userpg->cap_user_rdpmc =
 +              !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
 +      userpg->pmc_width = x86_pmu.cntval_bits;
 +
 +      if (!sched_clock_stable())
 +              return;
 +
 +      data = cyc2ns_read_begin();
 +
 +      /*
 +       * Internal timekeeping for enabled/running/stopped times
 +       * is always in the local_clock domain.
 +       */
 +      userpg->cap_user_time = 1;
 +      userpg->time_mult = data->cyc2ns_mul;
 +      userpg->time_shift = data->cyc2ns_shift;
 +      userpg->time_offset = data->cyc2ns_offset - now;
 +
 +      /*
 +       * cap_user_time_zero doesn't make sense when we're using a different
 +       * time base for the records.
 +       */
 +      if (event->clock == &local_clock) {
 +              userpg->cap_user_time_zero = 1;
 +              userpg->time_zero = data->cyc2ns_offset;
 +      }
 +
 +      cyc2ns_read_end(data);
 +}
 +
 +/*
 + * callchain support
 + */
 +
 +static int backtrace_stack(void *data, char *name)
 +{
 +      return 0;
 +}
 +
-       perf_callchain_store(entry, addr);
++static int backtrace_address(void *data, unsigned long addr, int reliable)
 +{
 +      struct perf_callchain_entry *entry = data;
 +
++      return perf_callchain_store(entry, addr);
 +}
 +
 +static const struct stacktrace_ops backtrace_ops = {
 +      .stack                  = backtrace_stack,
 +      .address                = backtrace_address,
 +      .walk_stack             = print_context_stack_bp,
 +};
 +
 +void
 +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 +{
 +      if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 +              /* TODO: We don't support guest os callchain now */
 +              return;
 +      }
 +
 +      perf_callchain_store(entry, regs->ip);
 +
 +      dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
 +}
 +
 +static inline int
 +valid_user_frame(const void __user *fp, unsigned long size)
 +{
 +      return (__range_not_ok(fp, size, TASK_SIZE) == 0);
 +}
 +
 +static unsigned long get_segment_base(unsigned int segment)
 +{
 +      struct desc_struct *desc;
 +      int idx = segment >> 3;
 +
 +      if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
 +#ifdef CONFIG_MODIFY_LDT_SYSCALL
 +              struct ldt_struct *ldt;
 +
 +              if (idx > LDT_ENTRIES)
 +                      return 0;
 +
 +              /* IRQs are off, so this synchronizes with smp_store_release */
 +              ldt = lockless_dereference(current->active_mm->context.ldt);
 +              if (!ldt || idx > ldt->size)
 +                      return 0;
 +
 +              desc = &ldt->entries[idx];
 +#else
 +              return 0;
 +#endif
 +      } else {
 +              if (idx > GDT_ENTRIES)
 +                      return 0;
 +
 +              desc = raw_cpu_ptr(gdt_page.gdt) + idx;
 +      }
 +
 +      return get_desc_base(desc);
 +}
 +
 +#ifdef CONFIG_IA32_EMULATION
 +
 +#include <asm/compat.h>
 +
 +static inline int
 +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 +{
 +      /* 32-bit process in 64-bit kernel. */
 +      unsigned long ss_base, cs_base;
 +      struct stack_frame_ia32 frame;
 +      const void __user *fp;
 +
 +      if (!test_thread_flag(TIF_IA32))
 +              return 0;
 +
 +      cs_base = get_segment_base(regs->cs);
 +      ss_base = get_segment_base(regs->ss);
 +
 +      fp = compat_ptr(ss_base + regs->bp);
 +      pagefault_disable();
 +      while (entry->nr < PERF_MAX_STACK_DEPTH) {
 +              unsigned long bytes;
 +              frame.next_frame     = 0;
 +              frame.return_address = 0;
 +
 +              if (!access_ok(VERIFY_READ, fp, 8))
 +                      break;
 +
 +              bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
 +              if (bytes != 0)
 +                      break;
 +              bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
 +              if (bytes != 0)
 +                      break;
 +
 +              if (!valid_user_frame(fp, sizeof(frame)))
 +                      break;
 +
 +              perf_callchain_store(entry, cs_base + frame.return_address);
 +              fp = compat_ptr(ss_base + frame.next_frame);
 +      }
 +      pagefault_enable();
 +      return 1;
 +}
 +#else
 +static inline int
 +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 +{
 +    return 0;
 +}
 +#endif
 +
 +void
 +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 +{
 +      struct stack_frame frame;
 +      const void __user *fp;
 +
 +      if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 +              /* TODO: We don't support guest os callchain now */
 +              return;
 +      }
 +
 +      /*
 +       * We don't know what to do with VM86 stacks.. ignore them for now.
 +       */
 +      if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
 +              return;
 +
 +      fp = (void __user *)regs->bp;
 +
 +      perf_callchain_store(entry, regs->ip);
 +
 +      if (!current->mm)
 +              return;
 +
 +      if (perf_callchain_user32(regs, entry))
 +              return;
 +
 +      pagefault_disable();
 +      while (entry->nr < PERF_MAX_STACK_DEPTH) {
 +              unsigned long bytes;
 +              frame.next_frame             = NULL;
 +              frame.return_address = 0;
 +
 +              if (!access_ok(VERIFY_READ, fp, 16))
 +                      break;
 +
 +              bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
 +              if (bytes != 0)
 +                      break;
 +              bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
 +              if (bytes != 0)
 +                      break;
 +
 +              if (!valid_user_frame(fp, sizeof(frame)))
 +                      break;
 +
 +              perf_callchain_store(entry, frame.return_address);
 +              fp = (void __user *)frame.next_frame;
 +      }
 +      pagefault_enable();
 +}
 +
 +/*
 + * Deal with code segment offsets for the various execution modes:
 + *
 + *   VM86 - the good olde 16 bit days, where the linear address is
 + *          20 bits and we use regs->ip + 0x10 * regs->cs.
 + *
 + *   IA32 - Where we need to look at GDT/LDT segment descriptor tables
 + *          to figure out what the 32bit base address is.
 + *
 + *    X32 - has TIF_X32 set, but is running in x86_64
 + *
 + * X86_64 - CS,DS,SS,ES are all zero based.
 + */
 +static unsigned long code_segment_base(struct pt_regs *regs)
 +{
 +      /*
 +       * For IA32 we look at the GDT/LDT segment base to convert the
 +       * effective IP to a linear address.
 +       */
 +
 +#ifdef CONFIG_X86_32
 +      /*
 +       * If we are in VM86 mode, add the segment offset to convert to a
 +       * linear address.
 +       */
 +      if (regs->flags & X86_VM_MASK)
 +              return 0x10 * regs->cs;
 +
 +      if (user_mode(regs) && regs->cs != __USER_CS)
 +              return get_segment_base(regs->cs);
 +#else
 +      if (user_mode(regs) && !user_64bit_mode(regs) &&
 +          regs->cs != __USER32_CS)
 +              return get_segment_base(regs->cs);
 +#endif
 +      return 0;
 +}
 +
 +unsigned long perf_instruction_pointer(struct pt_regs *regs)
 +{
 +      if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
 +              return perf_guest_cbs->get_guest_ip();
 +
 +      return regs->ip + code_segment_base(regs);
 +}
 +
 +unsigned long perf_misc_flags(struct pt_regs *regs)
 +{
 +      int misc = 0;
 +
 +      if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 +              if (perf_guest_cbs->is_user_mode())
 +                      misc |= PERF_RECORD_MISC_GUEST_USER;
 +              else
 +                      misc |= PERF_RECORD_MISC_GUEST_KERNEL;
 +      } else {
 +              if (user_mode(regs))
 +                      misc |= PERF_RECORD_MISC_USER;
 +              else
 +                      misc |= PERF_RECORD_MISC_KERNEL;
 +      }
 +
 +      if (regs->flags & PERF_EFLAGS_EXACT)
 +              misc |= PERF_RECORD_MISC_EXACT_IP;
 +
 +      return misc;
 +}
 +
 +void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
 +{
 +      cap->version            = x86_pmu.version;
 +      cap->num_counters_gp    = x86_pmu.num_counters;
 +      cap->num_counters_fixed = x86_pmu.num_counters_fixed;
 +      cap->bit_width_gp       = x86_pmu.cntval_bits;
 +      cap->bit_width_fixed    = x86_pmu.cntval_bits;
 +      cap->events_mask        = (unsigned int)x86_pmu.events_maskl;
 +      cap->events_mask_len    = x86_pmu.events_mask_len;
 +}
 +EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
index 32e5699eadfee52043a3028e2040a00ea2cc3375,0d1ff4b407d4e1f9c445cf7516495824f85f3cfe..8efa57a5f29ea58d119a0ab2ca130598d3144f0e
@@@ -135,7 -135,8 +135,8 @@@ print_context_stack_bp(struct thread_in
                if (!__kernel_text_address(addr))
                        break;
  
-               ops->address(data, addr, 1);
+               if (ops->address(data, addr, 1))
+                       break;
                frame = frame->next_frame;
                ret_addr = &frame->return_address;
                print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
@@@ -154,10 -155,11 +155,11 @@@ static int print_trace_stack(void *data
  /*
   * Print one address/symbol entries per line.
   */
- static void print_trace_address(void *data, unsigned long addr, int reliable)
+ static int print_trace_address(void *data, unsigned long addr, int reliable)
  {
        touch_nmi_watchdog();
        printk_stack_address(addr, reliable, data);
+       return 0;
  }
  
  static const struct stacktrace_ops print_trace_ops = {
@@@ -265,8 -267,9 +267,8 @@@ int __die(const char *str, struct pt_re
  #ifdef CONFIG_SMP
        printk("SMP ");
  #endif
 -#ifdef CONFIG_DEBUG_PAGEALLOC
 -      printk("DEBUG_PAGEALLOC ");
 -#endif
 +      if (debug_pagealloc_enabled())
 +              printk("DEBUG_PAGEALLOC ");
  #ifdef CONFIG_KASAN
        printk("KASAN");
  #endif
index 914bc98e753f5da03f4a216e8d1cea6b311ddd3a,a15a7b37d3862358eab3a5744c94d194224b7d71..f014eaf5969be32c38ea59028d8ab321a2fd5cd2
@@@ -41,6 -41,7 +41,7 @@@
  #include <linux/if_vlan.h>
  #include <net/ipv6.h>
  #include <net/addrconf.h>
+ #include <net/devlink.h>
  
  #include <rdma/ib_smi.h>
  #include <rdma/ib_user_verbs.h>
@@@ -1643,56 -1644,6 +1644,56 @@@ static int mlx4_ib_tunnel_steer_add(str
        return err;
  }
  
 +static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
 +                                    struct ib_flow_attr *flow_attr,
 +                                    enum mlx4_net_trans_promisc_mode *type)
 +{
 +      int err = 0;
 +
 +      if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
 +          (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
 +          (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
 +              return -EOPNOTSUPP;
 +      }
 +
 +      if (flow_attr->num_of_specs == 0) {
 +              type[0] = MLX4_FS_MC_SNIFFER;
 +              type[1] = MLX4_FS_UC_SNIFFER;
 +      } else {
 +              union ib_flow_spec *ib_spec;
 +
 +              ib_spec = (union ib_flow_spec *)(flow_attr + 1);
 +              if (ib_spec->type !=  IB_FLOW_SPEC_ETH)
 +                      return -EINVAL;
 +
 +              /* if all is zero than MC and UC */
 +              if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
 +                      type[0] = MLX4_FS_MC_SNIFFER;
 +                      type[1] = MLX4_FS_UC_SNIFFER;
 +              } else {
 +                      u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
 +                                          ib_spec->eth.mask.dst_mac[1],
 +                                          ib_spec->eth.mask.dst_mac[2],
 +                                          ib_spec->eth.mask.dst_mac[3],
 +                                          ib_spec->eth.mask.dst_mac[4],
 +                                          ib_spec->eth.mask.dst_mac[5]};
 +
 +                      /* Above xor was only on MC bit, non empty mask is valid
 +                       * only if this bit is set and rest are zero.
 +                       */
 +                      if (!is_zero_ether_addr(&mac[0]))
 +                              return -EINVAL;
 +
 +                      if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
 +                              type[0] = MLX4_FS_MC_SNIFFER;
 +                      else
 +                              type[0] = MLX4_FS_UC_SNIFFER;
 +              }
 +      }
 +
 +      return err;
 +}
 +
  static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
                                    struct ib_flow_attr *flow_attr,
                                    int domain)
        struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
        int is_bonded = mlx4_is_bonded(dev);
  
 +      if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
 +          (flow_attr->type != IB_FLOW_ATTR_NORMAL))
 +              return ERR_PTR(-EOPNOTSUPP);
 +
        memset(type, 0, sizeof(type));
  
        mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
  
        switch (flow_attr->type) {
        case IB_FLOW_ATTR_NORMAL:
 -              type[0] = MLX4_FS_REGULAR;
 +              /* If dont trap flag (continue match) is set, under specific
 +               * condition traffic be replicated to given qp,
 +               * without stealing it
 +               */
 +              if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
 +                      err = mlx4_ib_add_dont_trap_rule(dev,
 +                                                       flow_attr,
 +                                                       type);
 +                      if (err)
 +                              goto err_free;
 +              } else {
 +                      type[0] = MLX4_FS_REGULAR;
 +              }
                break;
  
        case IB_FLOW_ATTR_ALL_DEFAULT:
                break;
  
        case IB_FLOW_ATTR_SNIFFER:
 -              type[0] = MLX4_FS_UC_SNIFFER;
 -              type[1] = MLX4_FS_MC_SNIFFER;
 +              type[0] = MLX4_FS_MIRROR_RX_PORT;
 +              type[1] = MLX4_FS_MIRROR_SX_PORT;
                break;
  
        default:
@@@ -2585,6 -2520,9 +2586,9 @@@ static void *mlx4_ib_add(struct mlx4_de
        }
  
        ibdev->ib_active = true;
+       mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+               devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
+                                        &ibdev->ib_dev);
  
        if (mlx4_is_mfunc(ibdev->dev))
                init_pkeys(ibdev);
@@@ -2709,7 -2647,10 +2713,10 @@@ static void mlx4_ib_remove(struct mlx4_
  {
        struct mlx4_ib_dev *ibdev = ibdev_ptr;
        int p;
+       int i;
  
+       mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+               devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
        ibdev->ib_active = false;
        flush_workqueue(wq);
  
index 5afbb697e691df8af9102ece37d76e326b9702a1,e1cea4415704d5249d4c53e25416323b9d30d9b4..edd8b87418466a7b3717856915087b41122ff8cb
@@@ -42,6 -42,7 +42,7 @@@
  #include <rdma/ib_user_verbs.h>
  #include <rdma/ib_addr.h>
  #include <rdma/ib_cache.h>
+ #include <linux/mlx5/port.h>
  #include <linux/mlx5/vport.h>
  #include <rdma/ib_smi.h>
  #include <rdma/ib_umem.h>
@@@ -487,13 -488,6 +488,13 @@@ static int mlx5_ib_query_device(struct 
                props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
        if (MLX5_CAP_GEN(mdev, xrc))
                props->device_cap_flags |= IB_DEVICE_XRC;
 +      if (MLX5_CAP_GEN(mdev, imaicl)) {
 +              props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
 +                                         IB_DEVICE_MEM_WINDOW_TYPE_2B;
 +              props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
 +              /* We support 'Gappy' memory registration too */
 +              props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
 +      }
        props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
        if (MLX5_CAP_GEN(mdev, sho)) {
                props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
            (MLX5_CAP_ETH(dev->mdev, csum_cap)))
                        props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
  
 +      if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
 +              props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
 +              props->device_cap_flags |= IB_DEVICE_UD_TSO;
 +      }
 +
        props->vendor_part_id      = mdev->pdev->device;
        props->hw_ver              = mdev->pdev->revision;
  
        props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
        props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
        props->max_srq_sge         = max_rq_sg - 1;
 -      props->max_fast_reg_page_list_len = (unsigned int)-1;
 +      props->max_fast_reg_page_list_len =
 +              1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
        get_atomic_caps(dev, props);
        props->masked_atomic_cap   = IB_ATOMIC_NONE;
        props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
@@@ -1382,20 -1370,11 +1383,20 @@@ static int mlx5_ib_destroy_flow(struct 
        return 0;
  }
  
 +static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
 +{
 +      priority *= 2;
 +      if (!dont_trap)
 +              priority++;
 +      return priority;
 +}
 +
  #define MLX5_FS_MAX_TYPES      10
  #define MLX5_FS_MAX_ENTRIES    32000UL
  static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                                                struct ib_flow_attr *flow_attr)
  {
 +      bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
        struct mlx5_flow_namespace *ns = NULL;
        struct mlx5_ib_flow_prio *prio;
        struct mlx5_flow_table *ft;
        int err = 0;
  
        if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
 -              if (flow_is_multicast_only(flow_attr))
 +              if (flow_is_multicast_only(flow_attr) &&
 +                  !dont_trap)
                        priority = MLX5_IB_FLOW_MCAST_PRIO;
                else
 -                      priority = flow_attr->priority;
 +                      priority = ib_prio_to_core_prio(flow_attr->priority,
 +                                                      dont_trap);
                ns = mlx5_get_flow_namespace(dev->mdev,
                                             MLX5_FLOW_NAMESPACE_BYPASS);
                num_entries = MLX5_FS_MAX_ENTRIES;
@@@ -1458,7 -1435,6 +1459,7 @@@ static struct mlx5_ib_flow_handler *cre
        unsigned int spec_index;
        u32 *match_c;
        u32 *match_v;
 +      u32 action;
        int err = 0;
  
        if (!is_valid_attr(flow_attr))
  
        /* Outer header support only */
        match_criteria_enable = (!outer_header_zero(match_c)) << 0;
 +      action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
 +              MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
        handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
                                           match_c, match_v,
 -                                         MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 +                                         action,
                                           MLX5_FS_DEFAULT_FLOW_TAG,
                                           dst);
  
@@@ -1508,29 -1482,6 +1509,29 @@@ free
        return err ? ERR_PTR(err) : handler;
  }
  
 +static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
 +                                                        struct mlx5_ib_flow_prio *ft_prio,
 +                                                        struct ib_flow_attr *flow_attr,
 +                                                        struct mlx5_flow_destination *dst)
 +{
 +      struct mlx5_ib_flow_handler *handler_dst = NULL;
 +      struct mlx5_ib_flow_handler *handler = NULL;
 +
 +      handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
 +      if (!IS_ERR(handler)) {
 +              handler_dst = create_flow_rule(dev, ft_prio,
 +                                             flow_attr, dst);
 +              if (IS_ERR(handler_dst)) {
 +                      mlx5_del_flow_rule(handler->rule);
 +                      kfree(handler);
 +                      handler = handler_dst;
 +              } else {
 +                      list_add(&handler_dst->list, &handler->list);
 +              }
 +      }
 +
 +      return handler;
 +}
  enum {
        LEFTOVERS_MC,
        LEFTOVERS_UC,
@@@ -1608,7 -1559,7 +1609,7 @@@ static struct ib_flow *mlx5_ib_create_f
  
        if (domain != IB_FLOW_DOMAIN_USER ||
            flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
 -          flow_attr->flags)
 +          (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
                return ERR_PTR(-EINVAL);
  
        dst = kzalloc(sizeof(*dst), GFP_KERNEL);
        dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
  
        if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
 -              handler = create_flow_rule(dev, ft_prio, flow_attr,
 -                                         dst);
 +              if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)  {
 +                      handler = create_dont_trap_rule(dev, ft_prio,
 +                                                      flow_attr, dst);
 +              } else {
 +                      handler = create_flow_rule(dev, ft_prio, flow_attr,
 +                                                 dst);
 +              }
        } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
                   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
                handler = create_leftovers_rule(dev, ft_prio, flow_attr,
@@@ -1771,17 -1717,6 +1772,17 @@@ static struct device_attribute *mlx5_cl
        &dev_attr_reg_pages,
  };
  
 +static void pkey_change_handler(struct work_struct *work)
 +{
 +      struct mlx5_ib_port_resources *ports =
 +              container_of(work, struct mlx5_ib_port_resources,
 +                           pkey_change_work);
 +
 +      mutex_lock(&ports->devr->mutex);
 +      mlx5_ib_gsi_pkey_change(ports->gsi);
 +      mutex_unlock(&ports->devr->mutex);
 +}
 +
  static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
                          enum mlx5_dev_event event, unsigned long param)
  {
        case MLX5_DEV_EVENT_PKEY_CHANGE:
                ibev.event = IB_EVENT_PKEY_CHANGE;
                port = (u8)param;
 +
 +              schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
                break;
  
        case MLX5_DEV_EVENT_GUID_CHANGE:
@@@ -1906,7 -1839,7 +1907,7 @@@ static void destroy_umrc_res(struct mlx
                mlx5_ib_warn(dev, "mr cache cleanup failed\n");
  
        mlx5_ib_destroy_qp(dev->umrc.qp);
 -      ib_destroy_cq(dev->umrc.cq);
 +      ib_free_cq(dev->umrc.cq);
        ib_dealloc_pd(dev->umrc.pd);
  }
  
@@@ -1921,6 -1854,7 +1922,6 @@@ static int create_umr_res(struct mlx5_i
        struct ib_pd *pd;
        struct ib_cq *cq;
        struct ib_qp *qp;
 -      struct ib_cq_init_attr cq_attr = {};
        int ret;
  
        attr = kzalloc(sizeof(*attr), GFP_KERNEL);
                goto error_0;
        }
  
 -      cq_attr.cqe = 128;
 -      cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL,
 -                        &cq_attr);
 +      cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
        if (IS_ERR(cq)) {
                mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
                ret = PTR_ERR(cq);
                goto error_2;
        }
 -      ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
  
        init_attr->send_cq = cq;
        init_attr->recv_cq = cq;
@@@ -2009,7 -1946,7 +2010,7 @@@ error_4
        mlx5_ib_destroy_qp(qp);
  
  error_3:
 -      ib_destroy_cq(cq);
 +      ib_free_cq(cq);
  
  error_2:
        ib_dealloc_pd(pd);
@@@ -2025,13 -1962,10 +2026,13 @@@ static int create_dev_resources(struct 
        struct ib_srq_init_attr attr;
        struct mlx5_ib_dev *dev;
        struct ib_cq_init_attr cq_attr = {.cqe = 1};
 +      int port;
        int ret = 0;
  
        dev = container_of(devr, struct mlx5_ib_dev, devr);
  
 +      mutex_init(&devr->mutex);
 +
        devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
        if (IS_ERR(devr->p0)) {
                ret = PTR_ERR(devr->p0);
        atomic_inc(&devr->p0->usecnt);
        atomic_set(&devr->s0->usecnt, 0);
  
 +      for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
 +              INIT_WORK(&devr->ports[port].pkey_change_work,
 +                        pkey_change_handler);
 +              devr->ports[port].devr = devr;
 +      }
 +
        return 0;
  
  error5:
@@@ -2143,20 -2071,12 +2144,20 @@@ error0
  
  static void destroy_dev_resources(struct mlx5_ib_resources *devr)
  {
 +      struct mlx5_ib_dev *dev =
 +              container_of(devr, struct mlx5_ib_dev, devr);
 +      int port;
 +
        mlx5_ib_destroy_srq(devr->s1);
        mlx5_ib_destroy_srq(devr->s0);
        mlx5_ib_dealloc_xrcd(devr->x0);
        mlx5_ib_dealloc_xrcd(devr->x1);
        mlx5_ib_destroy_cq(devr->c0);
        mlx5_ib_dealloc_pd(devr->p0);
 +
 +      /* Make sure no change P_Key work items are still executing */
 +      for (port = 0; port < dev->num_ports; ++port)
 +              cancel_work_sync(&devr->ports[port].pkey_change_work);
  }
  
  static u32 get_core_cap_flags(struct ib_device *ibdev)
@@@ -2279,7 -2199,6 +2280,7 @@@ static void *mlx5_ib_add(struct mlx5_co
                (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
                (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
                (1ull << IB_USER_VERBS_CMD_REG_MR)              |
 +              (1ull << IB_USER_VERBS_CMD_REREG_MR)            |
                (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
                (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
                (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
        dev->ib_dev.req_notify_cq       = mlx5_ib_arm_cq;
        dev->ib_dev.get_dma_mr          = mlx5_ib_get_dma_mr;
        dev->ib_dev.reg_user_mr         = mlx5_ib_reg_user_mr;
 +      dev->ib_dev.rereg_user_mr       = mlx5_ib_rereg_user_mr;
        dev->ib_dev.dereg_mr            = mlx5_ib_dereg_mr;
        dev->ib_dev.attach_mcast        = mlx5_ib_mcg_attach;
        dev->ib_dev.detach_mcast        = mlx5_ib_mcg_detach;
  
        mlx5_ib_internal_fill_odp_caps(dev);
  
 +      if (MLX5_CAP_GEN(mdev, imaicl)) {
 +              dev->ib_dev.alloc_mw            = mlx5_ib_alloc_mw;
 +              dev->ib_dev.dealloc_mw          = mlx5_ib_dealloc_mw;
 +              dev->ib_dev.uverbs_cmd_mask |=
 +                      (1ull << IB_USER_VERBS_CMD_ALLOC_MW)    |
 +                      (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
 +      }
 +
        if (MLX5_CAP_GEN(mdev, xrc)) {
                dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
                dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
index f21b2c47978028f3ce9fc5f66c94f2b1158d993e,08c0415483002e27a2c3a7584fccb412fd5085f7..d2f917af539f35b8f994507c5c3758944b1d9d0d
@@@ -2324,6 -2324,7 +2324,7 @@@ static int gfar_start_xmit(struct sk_bu
        struct txfcb *fcb = NULL;
        struct txbd8 *txbdp, *txbdp_start, *base, *txbdp_tstamp = NULL;
        u32 lstatus;
+       skb_frag_t *frag;
        int i, rq = 0;
        int do_tstamp, do_csum, do_vlan;
        u32 bufaddr;
        txbdp = txbdp_start = tx_queue->cur_tx;
        lstatus = be32_to_cpu(txbdp->lstatus);
  
-       /* Time stamp insertion requires one additional TxBD */
-       if (unlikely(do_tstamp))
-               txbdp_tstamp = txbdp = next_txbd(txbdp, base,
-                                                tx_queue->tx_ring_size);
-       if (nr_frags == 0) {
-               if (unlikely(do_tstamp)) {
-                       u32 lstatus_ts = be32_to_cpu(txbdp_tstamp->lstatus);
-                       lstatus_ts |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
-                       txbdp_tstamp->lstatus = cpu_to_be32(lstatus_ts);
-               } else {
-                       lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
-               }
-       } else {
-               /* Place the fragment addresses and lengths into the TxBDs */
-               for (i = 0; i < nr_frags; i++) {
-                       unsigned int frag_len;
-                       /* Point at the next BD, wrapping as needed */
-                       txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
-                       frag_len = skb_shinfo(skb)->frags[i].size;
-                       lstatus = be32_to_cpu(txbdp->lstatus) | frag_len |
-                                 BD_LFLAG(TXBD_READY);
-                       /* Handle the last BD specially */
-                       if (i == nr_frags - 1)
-                               lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
-                       bufaddr = skb_frag_dma_map(priv->dev,
-                                                  &skb_shinfo(skb)->frags[i],
-                                                  0,
-                                                  frag_len,
-                                                  DMA_TO_DEVICE);
-                       if (unlikely(dma_mapping_error(priv->dev, bufaddr)))
-                               goto dma_map_err;
-                       /* set the TxBD length and buffer pointer */
-                       txbdp->bufPtr = cpu_to_be32(bufaddr);
-                       txbdp->lstatus = cpu_to_be32(lstatus);
-               }
-               lstatus = be32_to_cpu(txbdp_start->lstatus);
-       }
        /* Add TxPAL between FCB and frame if required */
        if (unlikely(do_tstamp)) {
                skb_push(skb, GMAC_TXPAL_LEN);
        if (do_vlan)
                gfar_tx_vlan(skb, fcb);
  
-       /* Setup tx hardware time stamping if requested */
-       if (unlikely(do_tstamp)) {
-               skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-               fcb->ptp = 1;
-       }
        bufaddr = dma_map_single(priv->dev, skb->data, skb_headlen(skb),
                                 DMA_TO_DEVICE);
        if (unlikely(dma_mapping_error(priv->dev, bufaddr)))
  
        txbdp_start->bufPtr = cpu_to_be32(bufaddr);
  
+       /* Time stamp insertion requires one additional TxBD */
+       if (unlikely(do_tstamp))
+               txbdp_tstamp = txbdp = next_txbd(txbdp, base,
+                                                tx_queue->tx_ring_size);
+       if (likely(!nr_frags)) {
+               lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
+       } else {
+               u32 lstatus_start = lstatus;
+               /* Place the fragment addresses and lengths into the TxBDs */
+               frag = &skb_shinfo(skb)->frags[0];
+               for (i = 0; i < nr_frags; i++, frag++) {
+                       unsigned int size;
+                       /* Point at the next BD, wrapping as needed */
+                       txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
+                       size = skb_frag_size(frag);
+                       lstatus = be32_to_cpu(txbdp->lstatus) | size |
+                                 BD_LFLAG(TXBD_READY);
+                       /* Handle the last BD specially */
+                       if (i == nr_frags - 1)
+                               lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
+                       bufaddr = skb_frag_dma_map(priv->dev, frag, 0,
+                                                  size, DMA_TO_DEVICE);
+                       if (unlikely(dma_mapping_error(priv->dev, bufaddr)))
+                               goto dma_map_err;
+                       /* set the TxBD length and buffer pointer */
+                       txbdp->bufPtr = cpu_to_be32(bufaddr);
+                       txbdp->lstatus = cpu_to_be32(lstatus);
+               }
+               lstatus = lstatus_start;
+       }
        /* If time stamping is requested one additional TxBD must be set up. The
         * first TxBD points to the FCB and must have a data length of
         * GMAC_FCB_LEN. The second TxBD points to the actual frame data with
  
                bufaddr = be32_to_cpu(txbdp_start->bufPtr);
                bufaddr += fcb_len;
                lstatus_ts |= BD_LFLAG(TXBD_READY) |
                              (skb_headlen(skb) - fcb_len);
+               if (!nr_frags)
+                       lstatus_ts |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
  
                txbdp_tstamp->bufPtr = cpu_to_be32(bufaddr);
                txbdp_tstamp->lstatus = cpu_to_be32(lstatus_ts);
                lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | GMAC_FCB_LEN;
+               /* Setup tx hardware time stamping */
+               skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+               fcb->ptp = 1;
        } else {
                lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | skb_headlen(skb);
        }
@@@ -2712,7 -2708,7 +2708,7 @@@ static void gfar_clean_tx_ring(struct g
                                          ~0x7UL);
  
                        memset(&shhwtstamps, 0, sizeof(shhwtstamps));
-                       shhwtstamps.hwtstamp = ns_to_ktime(*ns);
+                       shhwtstamps.hwtstamp = ns_to_ktime(be64_to_cpu(*ns));
                        skb_pull(skb, GMAC_FCB_LEN + GMAC_TXPAL_LEN);
                        skb_tstamp_tx(skb, &shhwtstamps);
                        gfar_clear_txbd_status(bdp);
@@@ -2944,7 -2940,7 +2940,7 @@@ static bool gfar_add_rx_frag(struct gfa
        /* change offset to the other half */
        rxb->page_offset ^= GFAR_RXB_TRUESIZE;
  
 -      atomic_inc(&page->_count);
 +      page_ref_inc(page);
  
        return true;
  }
@@@ -3041,7 -3037,7 +3037,7 @@@ static void gfar_process_frame(struct n
                u64 *ns = (u64 *) skb->data;
  
                memset(shhwtstamps, 0, sizeof(*shhwtstamps));
-               shhwtstamps->hwtstamp = ns_to_ktime(*ns);
+               shhwtstamps->hwtstamp = ns_to_ktime(be64_to_cpu(*ns));
        }
  
        if (priv->padding)
index b4547ebed77499133ac62eae09dace7ffe270b0f,38f558e0bb6298461828f7a5fb3b2c0ba776a666..4de17db3808ce71b964fc505fe04c69f32743632
@@@ -243,7 -243,7 +243,7 @@@ static bool fm10k_can_reuse_rx_page(str
        /* Even if we own the page, we are not allowed to use atomic_set()
         * This would break get_page_unless_zero() users.
         */
 -      atomic_inc(&page->_count);
 +      page_ref_inc(page);
  
        return true;
  }
@@@ -1937,8 -1937,10 +1937,10 @@@ static void fm10k_init_reta(struct fm10
        u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices;
        u32 reta, base;
  
-       /* If the netdev is initialized we have to maintain table if possible */
-       if (interface->netdev->reg_state != NETREG_UNINITIALIZED) {
+       /* If the Rx flow indirection table has been configured manually, we
+        * need to maintain it when possible.
+        */
+       if (netif_is_rxfh_configured(interface->netdev)) {
                for (i = FM10K_RETA_SIZE; i--;) {
                        reta = interface->reta[i];
                        if ((((reta << 24) >> 24) < rss_i) &&
                            (((reta <<  8) >> 24) < rss_i) &&
                            (((reta)       >> 24) < rss_i))
                                continue;
+                       /* this should never happen */
+                       dev_err(&interface->pdev->dev,
+                               "RSS indirection table assigned flows out of queue bounds. Reconfiguring.\n");
                        goto repopulate_reta;
                }
  
index 5b4ad1ad4d5f962f445dfebcd744c6dbb62f722a,834b1b6a9277b461f0969044b318f2d1046c9291..55a1405cb2a143409f85e969182f612e8a626879
@@@ -122,8 -122,8 +122,8 @@@ static void igb_setup_mrqc(struct igb_a
  static int igb_probe(struct pci_dev *, const struct pci_device_id *);
  static void igb_remove(struct pci_dev *pdev);
  static int igb_sw_init(struct igb_adapter *);
static int igb_open(struct net_device *);
static int igb_close(struct net_device *);
+ int igb_open(struct net_device *);
+ int igb_close(struct net_device *);
  static void igb_configure(struct igb_adapter *);
  static void igb_configure_tx(struct igb_adapter *);
  static void igb_configure_rx(struct igb_adapter *);
@@@ -140,7 -140,7 +140,7 @@@ static struct rtnl_link_stats64 *igb_ge
                                          struct rtnl_link_stats64 *stats);
  static int igb_change_mtu(struct net_device *, int);
  static int igb_set_mac(struct net_device *, void *);
- static void igb_set_uta(struct igb_adapter *adapter);
+ static void igb_set_uta(struct igb_adapter *adapter, bool set);
  static irqreturn_t igb_intr(int irq, void *);
  static irqreturn_t igb_intr_msi(int irq, void *);
  static irqreturn_t igb_msix_other(int irq, void *);
@@@ -1534,12 -1534,13 +1534,13 @@@ static void igb_irq_enable(struct igb_a
  static void igb_update_mng_vlan(struct igb_adapter *adapter)
  {
        struct e1000_hw *hw = &adapter->hw;
+       u16 pf_id = adapter->vfs_allocated_count;
        u16 vid = adapter->hw.mng_cookie.vlan_id;
        u16 old_vid = adapter->mng_vlan_id;
  
        if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
                /* add VID to filter table */
-               igb_vfta_set(hw, vid, true);
+               igb_vfta_set(hw, vid, pf_id, true, true);
                adapter->mng_vlan_id = vid;
        } else {
                adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
            (vid != old_vid) &&
            !test_bit(old_vid, adapter->active_vlans)) {
                /* remove VID from filter table */
-               igb_vfta_set(hw, old_vid, false);
+               igb_vfta_set(hw, vid, pf_id, false, true);
        }
  }
  
@@@ -1818,6 -1819,10 +1819,10 @@@ void igb_down(struct igb_adapter *adapt
  
        if (!pci_channel_offline(adapter->pdev))
                igb_reset(adapter);
+       /* clear VLAN promisc flag so VFTA will be updated if necessary */
+       adapter->flags &= ~IGB_FLAG_VLAN_PROMISC;
        igb_clean_all_tx_rings(adapter);
        igb_clean_all_rx_rings(adapter);
  #ifdef CONFIG_IGB_DCA
@@@ -1862,7 -1867,7 +1867,7 @@@ void igb_reset(struct igb_adapter *adap
        struct e1000_hw *hw = &adapter->hw;
        struct e1000_mac_info *mac = &hw->mac;
        struct e1000_fc_info *fc = &hw->fc;
-       u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm;
+       u32 pba, hwm;
  
        /* Repartition Pba for greater than 9k mtu
         * To take effect CTRL.RST is required.
                break;
        }
  
-       if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
-           (mac->type < e1000_82576)) {
-               /* adjust PBA for jumbo frames */
+       if (mac->type == e1000_82575) {
+               u32 min_rx_space, min_tx_space, needed_tx_space;
+               /* write Rx PBA so that hardware can report correct Tx PBA */
                wr32(E1000_PBA, pba);
  
                /* To maintain wire speed transmits, the Tx FIFO should be
                 * one full receive packet and is similarly rounded up and
                 * expressed in KB.
                 */
-               pba = rd32(E1000_PBA);
-               /* upper 16 bits has Tx packet buffer allocation size in KB */
-               tx_space = pba >> 16;
-               /* lower 16 bits has Rx packet buffer allocation size in KB */
-               pba &= 0xffff;
-               /* the Tx fifo also stores 16 bytes of information about the Tx
-                * but don't include ethernet FCS because hardware appends it
+               min_rx_space = DIV_ROUND_UP(MAX_JUMBO_FRAME_SIZE, 1024);
+               /* The Tx FIFO also stores 16 bytes of information about the Tx
+                * but don't include Ethernet FCS because hardware appends it.
+                * We only need to round down to the nearest 512 byte block
+                * count since the value we care about is 2 frames, not 1.
                 */
-               min_tx_space = (adapter->max_frame_size +
-                               sizeof(union e1000_adv_tx_desc) -
-                               ETH_FCS_LEN) * 2;
-               min_tx_space = ALIGN(min_tx_space, 1024);
-               min_tx_space >>= 10;
-               /* software strips receive CRC, so leave room for it */
-               min_rx_space = adapter->max_frame_size;
-               min_rx_space = ALIGN(min_rx_space, 1024);
-               min_rx_space >>= 10;
+               min_tx_space = adapter->max_frame_size;
+               min_tx_space += sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN;
+               min_tx_space = DIV_ROUND_UP(min_tx_space, 512);
+               /* upper 16 bits has Tx packet buffer allocation size in KB */
+               needed_tx_space = min_tx_space - (rd32(E1000_PBA) >> 16);
  
                /* If current Tx allocation is less than the min Tx FIFO size,
                 * and the min Tx FIFO size is less than the current Rx FIFO
-                * allocation, take space away from current Rx allocation
+                * allocation, take space away from current Rx allocation.
                 */
-               if (tx_space < min_tx_space &&
-                   ((min_tx_space - tx_space) < pba)) {
-                       pba = pba - (min_tx_space - tx_space);
+               if (needed_tx_space < pba) {
+                       pba -= needed_tx_space;
  
                        /* if short on Rx space, Rx wins and must trump Tx
                         * adjustment
                        if (pba < min_rx_space)
                                pba = min_rx_space;
                }
+               /* adjust PBA for jumbo frames */
                wr32(E1000_PBA, pba);
        }
  
-       /* flow control settings */
-       /* The high water mark must be low enough to fit one full frame
-        * (or the size used for early receive) above it in the Rx FIFO.
-        * Set it to the lower of:
-        * - 90% of the Rx FIFO size, or
-        * - the full Rx FIFO size minus one full frame
+       /* flow control settings
+        * The high water mark must be low enough to fit one full frame
+        * after transmitting the pause frame.  As such we must have enough
+        * space to allow for us to complete our current transmit and then
+        * receive the frame that is in progress from the link partner.
+        * Set it to:
+        * - the full Rx FIFO size minus one full Tx plus one full Rx frame
         */
-       hwm = min(((pba << 10) * 9 / 10),
-                       ((pba << 10) - 2 * adapter->max_frame_size));
+       hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
  
        fc->high_water = hwm & 0xFFFFFFF0;      /* 16-byte granularity */
        fc->low_water = fc->high_water - 16;
@@@ -2051,7 -2054,7 +2054,7 @@@ static int igb_set_features(struct net_
        if (changed & NETIF_F_HW_VLAN_CTAG_RX)
                igb_vlan_mode(netdev, features);
  
-       if (!(changed & NETIF_F_RXALL))
+       if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
                return 0;
  
        netdev->features = features;
        return 0;
  }
  
+ static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+                          struct net_device *dev,
+                          const unsigned char *addr, u16 vid,
+                          u16 flags)
+ {
+       /* guarantee we can provide a unique filter for the unicast address */
+       if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
+               struct igb_adapter *adapter = netdev_priv(dev);
+               struct e1000_hw *hw = &adapter->hw;
+               int vfn = adapter->vfs_allocated_count;
+               int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
+               if (netdev_uc_count(dev) >= rar_entries)
+                       return -ENOMEM;
+       }
+       return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags);
+ }
  static const struct net_device_ops igb_netdev_ops = {
        .ndo_open               = igb_open,
        .ndo_stop               = igb_close,
  #endif
        .ndo_fix_features       = igb_fix_features,
        .ndo_set_features       = igb_set_features,
+       .ndo_fdb_add            = igb_ndo_fdb_add,
        .ndo_features_check     = passthru_features_check,
  };
  
@@@ -2349,27 -2372,35 +2372,35 @@@ static int igb_probe(struct pci_dev *pd
         * assignment.
         */
        netdev->features |= NETIF_F_SG |
-                           NETIF_F_IP_CSUM |
-                           NETIF_F_IPV6_CSUM |
                            NETIF_F_TSO |
                            NETIF_F_TSO6 |
                            NETIF_F_RXHASH |
                            NETIF_F_RXCSUM |
+                           NETIF_F_HW_CSUM |
                            NETIF_F_HW_VLAN_CTAG_RX |
                            NETIF_F_HW_VLAN_CTAG_TX;
  
+       if (hw->mac.type >= e1000_82576)
+               netdev->features |= NETIF_F_SCTP_CRC;
        /* copy netdev features into list of user selectable features */
        netdev->hw_features |= netdev->features;
        netdev->hw_features |= NETIF_F_RXALL;
  
+       if (hw->mac.type >= e1000_i350)
+               netdev->hw_features |= NETIF_F_NTUPLE;
        /* set this bit last since it cannot be part of hw_features */
        netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
  
-       netdev->vlan_features |= NETIF_F_TSO |
+       netdev->vlan_features |= NETIF_F_SG |
+                                NETIF_F_TSO |
                                 NETIF_F_TSO6 |
-                                NETIF_F_IP_CSUM |
-                                NETIF_F_IPV6_CSUM |
-                                NETIF_F_SG;
+                                NETIF_F_HW_CSUM |
+                                NETIF_F_SCTP_CRC;
+       netdev->mpls_features |= NETIF_F_HW_CSUM;
+       netdev->hw_enc_features |= NETIF_F_HW_CSUM;
  
        netdev->priv_flags |= IFF_SUPP_NOFCS;
  
                netdev->vlan_features |= NETIF_F_HIGHDMA;
        }
  
-       if (hw->mac.type >= e1000_82576) {
-               netdev->hw_features |= NETIF_F_SCTP_CRC;
-               netdev->features |= NETIF_F_SCTP_CRC;
-       }
        netdev->priv_flags |= IFF_UNICAST_FLT;
  
        adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
                adapter->wol = 0;
        }
  
+       /* Some vendors want the ability to Use the EEPROM setting as
+        * enable/disable only, and not for capability
+        */
+       if (((hw->mac.type == e1000_i350) ||
+            (hw->mac.type == e1000_i354)) &&
+           (pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)) {
+               adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+               adapter->wol = 0;
+       }
+       if (hw->mac.type == e1000_i350) {
+               if (((pdev->subsystem_device == 0x5001) ||
+                    (pdev->subsystem_device == 0x5002)) &&
+                               (hw->bus.func == 0)) {
+                       adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+                       adapter->wol = 0;
+               }
+               if (pdev->subsystem_device == 0x1F52)
+                       adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+       }
        device_set_wakeup_enable(&adapter->pdev->dev,
                                 adapter->flags & IGB_FLAG_WOL_SUPPORTED);
  
@@@ -2921,14 -2967,6 +2967,6 @@@ void igb_set_flag_queue_pairs(struct ig
                /* Device supports enough interrupts without queue pairing. */
                break;
        case e1000_82576:
-               /* If VFs are going to be allocated with RSS queues then we
-                * should pair the queues in order to conserve interrupts due
-                * to limited supply.
-                */
-               if ((adapter->rss_queues > 1) &&
-                   (adapter->vfs_allocated_count > 6))
-                       adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
-               /* fall through */
        case e1000_82580:
        case e1000_i350:
        case e1000_i354:
                 */
                if (adapter->rss_queues > (max_rss_queues / 2))
                        adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
+               else
+                       adapter->flags &= ~IGB_FLAG_QUEUE_PAIRS;
                break;
        }
  }
@@@ -3132,7 -3172,7 +3172,7 @@@ err_setup_tx
        return err;
  }
  
static int igb_open(struct net_device *netdev)
+ int igb_open(struct net_device *netdev)
  {
        return __igb_open(netdev, false);
  }
@@@ -3169,7 -3209,7 +3209,7 @@@ static int __igb_close(struct net_devic
        return 0;
  }
  
static int igb_close(struct net_device *netdev)
+ int igb_close(struct net_device *netdev)
  {
        return __igb_close(netdev, false);
  }
@@@ -3460,12 -3500,12 +3500,12 @@@ static void igb_setup_mrqc(struct igb_a
                        wr32(E1000_VT_CTL, vtctl);
                }
                if (adapter->rss_queues > 1)
-                       mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
+                       mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_MQ;
                else
                        mrqc |= E1000_MRQC_ENABLE_VMDQ;
        } else {
                if (hw->mac.type != e1000_i211)
-                       mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
+                       mrqc |= E1000_MRQC_ENABLE_RSS_MQ;
        }
        igb_vmm_control(adapter);
  
@@@ -3498,7 -3538,7 +3538,7 @@@ void igb_setup_rctl(struct igb_adapter 
        /* disable store bad packets and clear size bits. */
        rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
  
-       /* enable LPE to prevent packets larger than max_frame_size */
+       /* enable LPE to allow for reception of jumbo frames */
        rctl |= E1000_RCTL_LPE;
  
        /* disable queue 0 to prevent tail write w/o re-config */
                         E1000_RCTL_BAM | /* RX All Bcast Pkts */
                         E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
  
-               rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
-                         E1000_RCTL_DPF | /* Allow filtered pause */
+               rctl &= ~(E1000_RCTL_DPF | /* Allow filtered pause */
                          E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
                /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
                 * and that breaks VLANs.
@@@ -3539,12 -3578,8 +3578,8 @@@ static inline int igb_set_vf_rlpml(stru
        struct e1000_hw *hw = &adapter->hw;
        u32 vmolr;
  
-       /* if it isn't the PF check to see if VFs are enabled and
-        * increase the size to support vlan tags
-        */
-       if (vfn < adapter->vfs_allocated_count &&
-           adapter->vf_data[vfn].vlans_enabled)
-               size += VLAN_TAG_SIZE;
+       if (size > MAX_JUMBO_FRAME_SIZE)
+               size = MAX_JUMBO_FRAME_SIZE;
  
        vmolr = rd32(E1000_VMOLR(vfn));
        vmolr &= ~E1000_VMOLR_RLPML_MASK;
        return 0;
  }
  
- /**
-  *  igb_rlpml_set - set maximum receive packet size
-  *  @adapter: board private structure
-  *
-  *  Configure maximum receivable packet size.
-  **/
- static void igb_rlpml_set(struct igb_adapter *adapter)
+ static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter,
+                                        int vfn, bool enable)
  {
-       u32 max_frame_size = adapter->max_frame_size;
        struct e1000_hw *hw = &adapter->hw;
-       u16 pf_id = adapter->vfs_allocated_count;
+       u32 val, reg;
  
-       if (pf_id) {
-               igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
-               /* If we're in VMDQ or SR-IOV mode, then set global RLPML
-                * to our max jumbo frame size, in case we need to enable
-                * jumbo frames on one of the rings later.
-                * This will not pass over-length frames into the default
-                * queue because it's gated by the VMOLR.RLPML.
-                */
-               max_frame_size = MAX_JUMBO_FRAME_SIZE;
-       }
+       if (hw->mac.type < e1000_82576)
+               return;
  
-       wr32(E1000_RLPML, max_frame_size);
+       if (hw->mac.type == e1000_i350)
+               reg = E1000_DVMOLR(vfn);
+       else
+               reg = E1000_VMOLR(vfn);
+       val = rd32(reg);
+       if (enable)
+               val |= E1000_VMOLR_STRVLAN;
+       else
+               val &= ~(E1000_VMOLR_STRVLAN);
+       wr32(reg, val);
  }
  
  static inline void igb_set_vmolr(struct igb_adapter *adapter,
                return;
  
        vmolr = rd32(E1000_VMOLR(vfn));
-       vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
-       if (hw->mac.type == e1000_i350) {
-               u32 dvmolr;
-               dvmolr = rd32(E1000_DVMOLR(vfn));
-               dvmolr |= E1000_DVMOLR_STRVLAN;
-               wr32(E1000_DVMOLR(vfn), dvmolr);
-       }
        if (aupe)
                vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
        else
@@@ -3684,9 -3707,6 +3707,6 @@@ static void igb_configure_rx(struct igb
  {
        int i;
  
-       /* set UTA to appropriate mode */
-       igb_set_uta(adapter);
        /* set the correct pool for the PF default MAC address in entry 0 */
        igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
                         adapter->vfs_allocated_count);
@@@ -4004,6 -4024,130 +4024,130 @@@ static int igb_write_uc_addr_list(struc
        return count;
  }
  
+ static int igb_vlan_promisc_enable(struct igb_adapter *adapter)
+ {
+       struct e1000_hw *hw = &adapter->hw;
+       u32 i, pf_id;
+       switch (hw->mac.type) {
+       case e1000_i210:
+       case e1000_i211:
+       case e1000_i350:
+               /* VLAN filtering needed for VLAN prio filter */
+               if (adapter->netdev->features & NETIF_F_NTUPLE)
+                       break;
+               /* fall through */
+       case e1000_82576:
+       case e1000_82580:
+       case e1000_i354:
+               /* VLAN filtering needed for pool filtering */
+               if (adapter->vfs_allocated_count)
+                       break;
+               /* fall through */
+       default:
+               return 1;
+       }
+       /* We are already in VLAN promisc, nothing to do */
+       if (adapter->flags & IGB_FLAG_VLAN_PROMISC)
+               return 0;
+       if (!adapter->vfs_allocated_count)
+               goto set_vfta;
+       /* Add PF to all active pools */
+       pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT;
+       for (i = E1000_VLVF_ARRAY_SIZE; --i;) {
+               u32 vlvf = rd32(E1000_VLVF(i));
+               vlvf |= 1 << pf_id;
+               wr32(E1000_VLVF(i), vlvf);
+       }
+ set_vfta:
+       /* Set all bits in the VLAN filter table array */
+       for (i = E1000_VLAN_FILTER_TBL_SIZE; i--;)
+               hw->mac.ops.write_vfta(hw, i, ~0U);
+       /* Set flag so we don't redo unnecessary work */
+       adapter->flags |= IGB_FLAG_VLAN_PROMISC;
+       return 0;
+ }
+ #define VFTA_BLOCK_SIZE 8
+ static void igb_scrub_vfta(struct igb_adapter *adapter, u32 vfta_offset)
+ {
+       struct e1000_hw *hw = &adapter->hw;
+       u32 vfta[VFTA_BLOCK_SIZE] = { 0 };
+       u32 vid_start = vfta_offset * 32;
+       u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32);
+       u32 i, vid, word, bits, pf_id;
+       /* guarantee that we don't scrub out management VLAN */
+       vid = adapter->mng_vlan_id;
+       if (vid >= vid_start && vid < vid_end)
+               vfta[(vid - vid_start) / 32] |= 1 << (vid % 32);
+       if (!adapter->vfs_allocated_count)
+               goto set_vfta;
+       pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT;
+       for (i = E1000_VLVF_ARRAY_SIZE; --i;) {
+               u32 vlvf = rd32(E1000_VLVF(i));
+               /* pull VLAN ID from VLVF */
+               vid = vlvf & VLAN_VID_MASK;
+               /* only concern ourselves with a certain range */
+               if (vid < vid_start || vid >= vid_end)
+                       continue;
+               if (vlvf & E1000_VLVF_VLANID_ENABLE) {
+                       /* record VLAN ID in VFTA */
+                       vfta[(vid - vid_start) / 32] |= 1 << (vid % 32);
+                       /* if PF is part of this then continue */
+                       if (test_bit(vid, adapter->active_vlans))
+                               continue;
+               }
+               /* remove PF from the pool */
+               bits = ~(1 << pf_id);
+               bits &= rd32(E1000_VLVF(i));
+               wr32(E1000_VLVF(i), bits);
+       }
+ set_vfta:
+       /* extract values from active_vlans and write back to VFTA */
+       for (i = VFTA_BLOCK_SIZE; i--;) {
+               vid = (vfta_offset + i) * 32;
+               word = vid / BITS_PER_LONG;
+               bits = vid % BITS_PER_LONG;
+               vfta[i] |= adapter->active_vlans[word] >> bits;
+               hw->mac.ops.write_vfta(hw, vfta_offset + i, vfta[i]);
+       }
+ }
+ static void igb_vlan_promisc_disable(struct igb_adapter *adapter)
+ {
+       u32 i;
+       /* We are not in VLAN promisc, nothing to do */
+       if (!(adapter->flags & IGB_FLAG_VLAN_PROMISC))
+               return;
+       /* Set flag so we don't redo unnecessary work */
+       adapter->flags &= ~IGB_FLAG_VLAN_PROMISC;
+       for (i = 0; i < E1000_VLAN_FILTER_TBL_SIZE; i += VFTA_BLOCK_SIZE)
+               igb_scrub_vfta(adapter, i);
+ }
  /**
   *  igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
   *  @netdev: network interface device structure
@@@ -4018,21 -4162,17 +4162,17 @@@ static void igb_set_rx_mode(struct net_
        struct igb_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
        unsigned int vfn = adapter->vfs_allocated_count;
-       u32 rctl, vmolr = 0;
+       u32 rctl = 0, vmolr = 0;
        int count;
  
        /* Check for Promiscuous and All Multicast modes */
-       rctl = rd32(E1000_RCTL);
-       /* clear the effected bits */
-       rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
        if (netdev->flags & IFF_PROMISC) {
-               /* retain VLAN HW filtering if in VT mode */
-               if (adapter->vfs_allocated_count)
-                       rctl |= E1000_RCTL_VFE;
-               rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
-               vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
+               rctl |= E1000_RCTL_UPE | E1000_RCTL_MPE;
+               vmolr |= E1000_VMOLR_MPME;
+               /* enable use of UTA filter to force packets to default pool */
+               if (hw->mac.type == e1000_82576)
+                       vmolr |= E1000_VMOLR_ROPE;
        } else {
                if (netdev->flags & IFF_ALLMULTI) {
                        rctl |= E1000_RCTL_MPE;
                                vmolr |= E1000_VMOLR_ROMPE;
                        }
                }
-               /* Write addresses to available RAR registers, if there is not
-                * sufficient space to store all the addresses then enable
-                * unicast promiscuous mode
-                */
-               count = igb_write_uc_addr_list(netdev);
-               if (count < 0) {
-                       rctl |= E1000_RCTL_UPE;
-                       vmolr |= E1000_VMOLR_ROPE;
-               }
-               rctl |= E1000_RCTL_VFE;
        }
+       /* Write addresses to available RAR registers, if there is not
+        * sufficient space to store all the addresses then enable
+        * unicast promiscuous mode
+        */
+       count = igb_write_uc_addr_list(netdev);
+       if (count < 0) {
+               rctl |= E1000_RCTL_UPE;
+               vmolr |= E1000_VMOLR_ROPE;
+       }
+       /* enable VLAN filtering by default */
+       rctl |= E1000_RCTL_VFE;
+       /* disable VLAN filtering for modes that require it */
+       if ((netdev->flags & IFF_PROMISC) ||
+           (netdev->features & NETIF_F_RXALL)) {
+               /* if we fail to set all rules then just clear VFE */
+               if (igb_vlan_promisc_enable(adapter))
+                       rctl &= ~E1000_RCTL_VFE;
+       } else {
+               igb_vlan_promisc_disable(adapter);
+       }
+       /* update state of unicast, multicast, and VLAN filtering modes */
+       rctl |= rd32(E1000_RCTL) & ~(E1000_RCTL_UPE | E1000_RCTL_MPE |
+                                    E1000_RCTL_VFE);
        wr32(E1000_RCTL, rctl);
  
        /* In order to support SR-IOV and eventually VMDq it is necessary to set
        if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
                return;
  
+       /* set UTA to appropriate mode */
+       igb_set_uta(adapter, !!(vmolr & E1000_VMOLR_ROPE));
        vmolr |= rd32(E1000_VMOLR(vfn)) &
                 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
+       /* enable Rx jumbo frames, no need for restriction */
+       vmolr &= ~E1000_VMOLR_RLPML_MASK;
+       vmolr |= MAX_JUMBO_FRAME_SIZE | E1000_VMOLR_LPE;
        wr32(E1000_VMOLR(vfn), vmolr);
+       wr32(E1000_RLPML, MAX_JUMBO_FRAME_SIZE);
        igb_restore_vf_multicasts(adapter);
  }
  
@@@ -4227,6 -4394,7 +4394,7 @@@ static void igb_watchdog_task(struct wo
        u32 link;
        int i;
        u32 connsw;
+       u16 phy_data, retry_count = 20;
  
        link = igb_has_link(adapter);
  
                                break;
                        }
  
+                       if (adapter->link_speed != SPEED_1000)
+                               goto no_wait;
+                       /* wait for Remote receiver status OK */
+ retry_read_status:
+                       if (!igb_read_phy_reg(hw, PHY_1000T_STATUS,
+                                             &phy_data)) {
+                               if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
+                                   retry_count) {
+                                       msleep(100);
+                                       retry_count--;
+                                       goto retry_read_status;
+                               } else if (!retry_count) {
+                                       dev_err(&adapter->pdev->dev, "exceed max 2 second\n");
+                               }
+                       } else {
+                               dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n");
+                       }
+ no_wait:
                        netif_carrier_on(netdev);
  
                        igb_ping_all_vfs(adapter);
@@@ -4713,70 -4900,57 +4900,57 @@@ static int igb_tso(struct igb_ring *tx_
        return 1;
  }
  
+ static inline bool igb_ipv6_csum_is_sctp(struct sk_buff *skb)
+ {
+       unsigned int offset = 0;
+       ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL);
+       return offset == skb_checksum_start_offset(skb);
+ }
  static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
  {
        struct sk_buff *skb = first->skb;
        u32 vlan_macip_lens = 0;
-       u32 mss_l4len_idx = 0;
        u32 type_tucmd = 0;
  
        if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ csum_failed:
                if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
                        return;
-       } else {
-               u8 l4_hdr = 0;
-               switch (first->protocol) {
-               case htons(ETH_P_IP):
-                       vlan_macip_lens |= skb_network_header_len(skb);
-                       type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
-                       l4_hdr = ip_hdr(skb)->protocol;
-                       break;
-               case htons(ETH_P_IPV6):
-                       vlan_macip_lens |= skb_network_header_len(skb);
-                       l4_hdr = ipv6_hdr(skb)->nexthdr;
-                       break;
-               default:
-                       if (unlikely(net_ratelimit())) {
-                               dev_warn(tx_ring->dev,
-                                        "partial checksum but proto=%x!\n",
-                                        first->protocol);
-                       }
-                       break;
-               }
+               goto no_csum;
+       }
  
-               switch (l4_hdr) {
-               case IPPROTO_TCP:
-                       type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
-                       mss_l4len_idx = tcp_hdrlen(skb) <<
-                                       E1000_ADVTXD_L4LEN_SHIFT;
-                       break;
-               case IPPROTO_SCTP:
-                       type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
-                       mss_l4len_idx = sizeof(struct sctphdr) <<
-                                       E1000_ADVTXD_L4LEN_SHIFT;
-                       break;
-               case IPPROTO_UDP:
-                       mss_l4len_idx = sizeof(struct udphdr) <<
-                                       E1000_ADVTXD_L4LEN_SHIFT;
-                       break;
-               default:
-                       if (unlikely(net_ratelimit())) {
-                               dev_warn(tx_ring->dev,
-                                        "partial checksum but l4 proto=%x!\n",
-                                        l4_hdr);
-                       }
+       switch (skb->csum_offset) {
+       case offsetof(struct tcphdr, check):
+               type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
+               /* fall through */
+       case offsetof(struct udphdr, check):
+               break;
+       case offsetof(struct sctphdr, checksum):
+               /* validate that this is actually an SCTP request */
+               if (((first->protocol == htons(ETH_P_IP)) &&
+                    (ip_hdr(skb)->protocol == IPPROTO_SCTP)) ||
+                   ((first->protocol == htons(ETH_P_IPV6)) &&
+                    igb_ipv6_csum_is_sctp(skb))) {
+                       type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP;
                        break;
                }
-               /* update TX checksum flag */
-               first->tx_flags |= IGB_TX_FLAGS_CSUM;
+       default:
+               skb_checksum_help(skb);
+               goto csum_failed;
        }
  
+       /* update TX checksum flag */
+       first->tx_flags |= IGB_TX_FLAGS_CSUM;
+       vlan_macip_lens = skb_checksum_start_offset(skb) -
+                         skb_network_offset(skb);
+ no_csum:
        vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
        vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
  
-       igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
+       igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0);
  }
  
  #define IGB_SET_FLAG(_input, _flag, _result) \
@@@ -5088,16 -5262,6 +5262,6 @@@ static netdev_tx_t igb_xmit_frame(struc
  {
        struct igb_adapter *adapter = netdev_priv(netdev);
  
-       if (test_bit(__IGB_DOWN, &adapter->state)) {
-               dev_kfree_skb_any(skb);
-               return NETDEV_TX_OK;
-       }
-       if (skb->len <= 0) {
-               dev_kfree_skb_any(skb);
-               return NETDEV_TX_OK;
-       }
        /* The minimum packet size with TCTL.PSP set is 17 so pad the skb
         * in order to meet this minimum size requirement.
         */
@@@ -5792,125 -5956,132 +5956,132 @@@ static void igb_restore_vf_multicasts(s
  static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
  {
        struct e1000_hw *hw = &adapter->hw;
-       u32 pool_mask, reg, vid;
-       int i;
+       u32 pool_mask, vlvf_mask, i;
  
-       pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
+       /* create mask for VF and other pools */
+       pool_mask = E1000_VLVF_POOLSEL_MASK;
+       vlvf_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
+       /* drop PF from pool bits */
+       pool_mask &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT +
+                            adapter->vfs_allocated_count));
  
        /* Find the vlan filter for this id */
-       for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
-               reg = rd32(E1000_VLVF(i));
+       for (i = E1000_VLVF_ARRAY_SIZE; i--;) {
+               u32 vlvf = rd32(E1000_VLVF(i));
+               u32 vfta_mask, vid, vfta;
  
                /* remove the vf from the pool */
-               reg &= ~pool_mask;
-               /* if pool is empty then remove entry from vfta */
-               if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
-                   (reg & E1000_VLVF_VLANID_ENABLE)) {
-                       reg = 0;
-                       vid = reg & E1000_VLVF_VLANID_MASK;
-                       igb_vfta_set(hw, vid, false);
-               }
+               if (!(vlvf & vlvf_mask))
+                       continue;
+               /* clear out bit from VLVF */
+               vlvf ^= vlvf_mask;
+               /* if other pools are present, just remove ourselves */
+               if (vlvf & pool_mask)
+                       goto update_vlvfb;
  
-               wr32(E1000_VLVF(i), reg);
+               /* if PF is present, leave VFTA */
+               if (vlvf & E1000_VLVF_POOLSEL_MASK)
+                       goto update_vlvf;
+               vid = vlvf & E1000_VLVF_VLANID_MASK;
+               vfta_mask = 1 << (vid % 32);
+               /* clear bit from VFTA */
+               vfta = adapter->shadow_vfta[vid / 32];
+               if (vfta & vfta_mask)
+                       hw->mac.ops.write_vfta(hw, vid / 32, vfta ^ vfta_mask);
+ update_vlvf:
+               /* clear pool selection enable */
+               if (adapter->flags & IGB_FLAG_VLAN_PROMISC)
+                       vlvf &= E1000_VLVF_POOLSEL_MASK;
+               else
+                       vlvf = 0;
+ update_vlvfb:
+               /* clear pool bits */
+               wr32(E1000_VLVF(i), vlvf);
        }
+ }
  
-       adapter->vf_data[vf].vlans_enabled = 0;
+ static int igb_find_vlvf_entry(struct e1000_hw *hw, u32 vlan)
+ {
+       u32 vlvf;
+       int idx;
+       /* short cut the special case */
+       if (vlan == 0)
+               return 0;
+       /* Search for the VLAN id in the VLVF entries */
+       for (idx = E1000_VLVF_ARRAY_SIZE; --idx;) {
+               vlvf = rd32(E1000_VLVF(idx));
+               if ((vlvf & VLAN_VID_MASK) == vlan)
+                       break;
+       }
+       return idx;
  }
  
static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
void igb_update_pf_vlvf(struct igb_adapter *adapter, u32 vid)
  {
        struct e1000_hw *hw = &adapter->hw;
-       u32 reg, i;
-       /* The vlvf table only exists on 82576 hardware and newer */
-       if (hw->mac.type < e1000_82576)
-               return -1;
+       u32 bits, pf_id;
+       int idx;
  
-       /* we only need to do this if VMDq is enabled */
-       if (!adapter->vfs_allocated_count)
-               return -1;
+       idx = igb_find_vlvf_entry(hw, vid);
+       if (!idx)
+               return;
  
-       /* Find the vlan filter for this id */
-       for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
-               reg = rd32(E1000_VLVF(i));
-               if ((reg & E1000_VLVF_VLANID_ENABLE) &&
-                   vid == (reg & E1000_VLVF_VLANID_MASK))
-                       break;
+       /* See if any other pools are set for this VLAN filter
+        * entry other than the PF.
+        */
+       pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT;
+       bits = ~(1 << pf_id) & E1000_VLVF_POOLSEL_MASK;
+       bits &= rd32(E1000_VLVF(idx));
+       /* Disable the filter so this falls into the default pool. */
+       if (!bits) {
+               if (adapter->flags & IGB_FLAG_VLAN_PROMISC)
+                       wr32(E1000_VLVF(idx), 1 << pf_id);
+               else
+                       wr32(E1000_VLVF(idx), 0);
        }
+ }
  
-       if (add) {
-               if (i == E1000_VLVF_ARRAY_SIZE) {
-                       /* Did not find a matching VLAN ID entry that was
-                        * enabled.  Search for a free filter entry, i.e.
-                        * one without the enable bit set
-                        */
-                       for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
-                               reg = rd32(E1000_VLVF(i));
-                               if (!(reg & E1000_VLVF_VLANID_ENABLE))
-                                       break;
-                       }
-               }
-               if (i < E1000_VLVF_ARRAY_SIZE) {
-                       /* Found an enabled/available entry */
-                       reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
-                       /* if !enabled we need to set this up in vfta */
-                       if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
-                               /* add VID to filter table */
-                               igb_vfta_set(hw, vid, true);
-                               reg |= E1000_VLVF_VLANID_ENABLE;
-                       }
-                       reg &= ~E1000_VLVF_VLANID_MASK;
-                       reg |= vid;
-                       wr32(E1000_VLVF(i), reg);
-                       /* do not modify RLPML for PF devices */
-                       if (vf >= adapter->vfs_allocated_count)
-                               return 0;
-                       if (!adapter->vf_data[vf].vlans_enabled) {
-                               u32 size;
-                               reg = rd32(E1000_VMOLR(vf));
-                               size = reg & E1000_VMOLR_RLPML_MASK;
-                               size += 4;
-                               reg &= ~E1000_VMOLR_RLPML_MASK;
-                               reg |= size;
-                               wr32(E1000_VMOLR(vf), reg);
-                       }
+ static s32 igb_set_vf_vlan(struct igb_adapter *adapter, u32 vid,
+                          bool add, u32 vf)
+ {
+       int pf_id = adapter->vfs_allocated_count;
+       struct e1000_hw *hw = &adapter->hw;
+       int err;
  
-                       adapter->vf_data[vf].vlans_enabled++;
-               }
-       } else {
-               if (i < E1000_VLVF_ARRAY_SIZE) {
-                       /* remove vf from the pool */
-                       reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
-                       /* if pool is empty then remove entry from vfta */
-                       if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
-                               reg = 0;
-                               igb_vfta_set(hw, vid, false);
-                       }
-                       wr32(E1000_VLVF(i), reg);
-                       /* do not modify RLPML for PF devices */
-                       if (vf >= adapter->vfs_allocated_count)
-                               return 0;
-                       adapter->vf_data[vf].vlans_enabled--;
-                       if (!adapter->vf_data[vf].vlans_enabled) {
-                               u32 size;
-                               reg = rd32(E1000_VMOLR(vf));
-                               size = reg & E1000_VMOLR_RLPML_MASK;
-                               size -= 4;
-                               reg &= ~E1000_VMOLR_RLPML_MASK;
-                               reg |= size;
-                               wr32(E1000_VMOLR(vf), reg);
-                       }
-               }
+       /* If VLAN overlaps with one the PF is currently monitoring make
+        * sure that we are able to allocate a VLVF entry.  This may be
+        * redundant but it guarantees PF will maintain visibility to
+        * the VLAN.
+        */
+       if (add && test_bit(vid, adapter->active_vlans)) {
+               err = igb_vfta_set(hw, vid, pf_id, true, false);
+               if (err)
+                       return err;
        }
-       return 0;
+       err = igb_vfta_set(hw, vid, vf, add, false);
+       if (add && !err)
+               return err;
+       /* If we failed to add the VF VLAN or we are removing the VF VLAN
+        * we may need to drop the PF pool bit in order to allow us to free
+        * up the VLVF resources.
+        */
+       if (test_bit(vid, adapter->active_vlans) ||
+           (adapter->flags & IGB_FLAG_VLAN_PROMISC))
+               igb_update_pf_vlvf(adapter, vid);
+       return err;
  }
  
  static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
                wr32(E1000_VMVIR(vf), 0);
  }
  
- static int igb_ndo_set_vf_vlan(struct net_device *netdev,
-                              int vf, u16 vlan, u8 qos)
+ static int igb_enable_port_vlan(struct igb_adapter *adapter, int vf,
+                               u16 vlan, u8 qos)
  {
-       int err = 0;
-       struct igb_adapter *adapter = netdev_priv(netdev);
+       int err;
  
-       if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
-               return -EINVAL;
-       if (vlan || qos) {
-               err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
-               if (err)
-                       goto out;
-               igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
-               igb_set_vmolr(adapter, vf, !vlan);
-               adapter->vf_data[vf].pf_vlan = vlan;
-               adapter->vf_data[vf].pf_qos = qos;
-               dev_info(&adapter->pdev->dev,
-                        "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
-               if (test_bit(__IGB_DOWN, &adapter->state)) {
-                       dev_warn(&adapter->pdev->dev,
-                                "The VF VLAN has been set, but the PF device is not up.\n");
-                       dev_warn(&adapter->pdev->dev,
-                                "Bring the PF device up before attempting to use the VF device.\n");
-               }
-       } else {
-               igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
-                            false, vf);
-               igb_set_vmvir(adapter, vlan, vf);
-               igb_set_vmolr(adapter, vf, true);
-               adapter->vf_data[vf].pf_vlan = 0;
-               adapter->vf_data[vf].pf_qos = 0;
+       err = igb_set_vf_vlan(adapter, vlan, true, vf);
+       if (err)
+               return err;
+       igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
+       igb_set_vmolr(adapter, vf, !vlan);
+       /* revoke access to previous VLAN */
+       if (vlan != adapter->vf_data[vf].pf_vlan)
+               igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan,
+                               false, vf);
+       adapter->vf_data[vf].pf_vlan = vlan;
+       adapter->vf_data[vf].pf_qos = qos;
+       igb_set_vf_vlan_strip(adapter, vf, true);
+       dev_info(&adapter->pdev->dev,
+                "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
+       if (test_bit(__IGB_DOWN, &adapter->state)) {
+               dev_warn(&adapter->pdev->dev,
+                        "The VF VLAN has been set, but the PF device is not up.\n");
+               dev_warn(&adapter->pdev->dev,
+                        "Bring the PF device up before attempting to use the VF device.\n");
        }
- out:
        return err;
  }
  
- static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid)
+ static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf)
  {
-       struct e1000_hw *hw = &adapter->hw;
-       int i;
-       u32 reg;
+       /* Restore tagless access via VLAN 0 */
+       igb_set_vf_vlan(adapter, 0, true, vf);
  
-       /* Find the vlan filter for this id */
-       for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
-               reg = rd32(E1000_VLVF(i));
-               if ((reg & E1000_VLVF_VLANID_ENABLE) &&
-                   vid == (reg & E1000_VLVF_VLANID_MASK))
-                       break;
-       }
+       igb_set_vmvir(adapter, 0, vf);
+       igb_set_vmolr(adapter, vf, true);
+       /* Remove any PF assigned VLAN */
+       if (adapter->vf_data[vf].pf_vlan)
+               igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan,
+                               false, vf);
  
-       if (i >= E1000_VLVF_ARRAY_SIZE)
-               i = -1;
+       adapter->vf_data[vf].pf_vlan = 0;
+       adapter->vf_data[vf].pf_qos = 0;
+       igb_set_vf_vlan_strip(adapter, vf, false);
  
-       return i;
+       return 0;
  }
  
- static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+ static int igb_ndo_set_vf_vlan(struct net_device *netdev,
+                              int vf, u16 vlan, u8 qos)
  {
-       struct e1000_hw *hw = &adapter->hw;
-       int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
-       int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
-       int err = 0;
+       struct igb_adapter *adapter = netdev_priv(netdev);
  
-       /* If in promiscuous mode we need to make sure the PF also has
-        * the VLAN filter set.
-        */
-       if (add && (adapter->netdev->flags & IFF_PROMISC))
-               err = igb_vlvf_set(adapter, vid, add,
-                                  adapter->vfs_allocated_count);
-       if (err)
-               goto out;
+       if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
+               return -EINVAL;
  
-       err = igb_vlvf_set(adapter, vid, add, vf);
+       return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) :
+                              igb_disable_port_vlan(adapter, vf);
+ }
  
-       if (err)
-               goto out;
+ static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+ {
+       int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
+       int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
+       int ret;
  
-       /* Go through all the checks to see if the VLAN filter should
-        * be wiped completely.
-        */
-       if (!add && (adapter->netdev->flags & IFF_PROMISC)) {
-               u32 vlvf, bits;
-               int regndx = igb_find_vlvf_entry(adapter, vid);
-               if (regndx < 0)
-                       goto out;
-               /* See if any other pools are set for this VLAN filter
-                * entry other than the PF.
-                */
-               vlvf = bits = rd32(E1000_VLVF(regndx));
-               bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT +
-                             adapter->vfs_allocated_count);
-               /* If the filter was removed then ensure PF pool bit
-                * is cleared if the PF only added itself to the pool
-                * because the PF is in promiscuous mode.
-                */
-               if ((vlvf & VLAN_VID_MASK) == vid &&
-                   !test_bit(vid, adapter->active_vlans) &&
-                   !bits)
-                       igb_vlvf_set(adapter, vid, add,
-                                    adapter->vfs_allocated_count);
-       }
+       if (adapter->vf_data[vf].pf_vlan)
+               return -1;
  
- out:
-       return err;
+       /* VLAN 0 is a special case, don't allow it to be removed */
+       if (!vid && !add)
+               return 0;
+       ret = igb_set_vf_vlan(adapter, vid, !!add, vf);
+       if (!ret)
+               igb_set_vf_vlan_strip(adapter, vf, !!vid);
+       return ret;
  }
  
  static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
  {
-       /* clear flags - except flag that indicates PF has set the MAC */
-       adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
-       adapter->vf_data[vf].last_nack = jiffies;
+       struct vf_data_storage *vf_data = &adapter->vf_data[vf];
  
-       /* reset offloads to defaults */
-       igb_set_vmolr(adapter, vf, true);
+       /* clear flags - except flag that indicates PF has set the MAC */
+       vf_data->flags &= IGB_VF_FLAG_PF_SET_MAC;
+       vf_data->last_nack = jiffies;
  
        /* reset vlans for device */
        igb_clear_vf_vfta(adapter, vf);
-       if (adapter->vf_data[vf].pf_vlan)
-               igb_ndo_set_vf_vlan(adapter->netdev, vf,
-                                   adapter->vf_data[vf].pf_vlan,
-                                   adapter->vf_data[vf].pf_qos);
-       else
-               igb_clear_vf_vfta(adapter, vf);
+       igb_set_vf_vlan(adapter, vf_data->pf_vlan, true, vf);
+       igb_set_vmvir(adapter, vf_data->pf_vlan |
+                              (vf_data->pf_qos << VLAN_PRIO_SHIFT), vf);
+       igb_set_vmolr(adapter, vf, !vf_data->pf_vlan);
+       igb_set_vf_vlan_strip(adapter, vf, !!(vf_data->pf_vlan));
  
        /* reset multicast table array for vf */
        adapter->vf_data[vf].num_vf_mc_hashes = 0;
@@@ -6191,7 -6336,7 +6336,7 @@@ static void igb_rcv_msg_from_vf(struct 
                                 "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n",
                                 vf);
                else
-                       retval = igb_set_vf_vlan(adapter, msgbuf, vf);
+                       retval = igb_set_vf_vlan_msg(adapter, msgbuf, vf);
                break;
        default:
                dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
@@@ -6233,6 -6378,7 +6378,7 @@@ static void igb_msg_task(struct igb_ada
  /**
   *  igb_set_uta - Set unicast filter table address
   *  @adapter: board private structure
+  *  @set: boolean indicating if we are setting or clearing bits
   *
   *  The unicast table address is a register array of 32-bit registers.
   *  The table is meant to be used in a way similar to how the MTA is used
   *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
   *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
   **/
- static void igb_set_uta(struct igb_adapter *adapter)
+ static void igb_set_uta(struct igb_adapter *adapter, bool set)
  {
        struct e1000_hw *hw = &adapter->hw;
+       u32 uta = set ? ~0 : 0;
        int i;
  
-       /* The UTA table only exists on 82576 hardware and newer */
-       if (hw->mac.type < e1000_82576)
-               return;
        /* we only need to do this if VMDq is enabled */
        if (!adapter->vfs_allocated_count)
                return;
  
-       for (i = 0; i < hw->mac.uta_reg_count; i++)
-               array_wr32(E1000_UTA, i, ~0);
+       for (i = hw->mac.uta_reg_count; i--;)
+               array_wr32(E1000_UTA, i, uta);
  }
  
  /**
@@@ -6630,7 -6773,7 +6773,7 @@@ static bool igb_can_reuse_rx_page(struc
        /* Even if we own the page, we are not allowed to use atomic_set()
         * This would break get_page_unless_zero() users.
         */
 -      atomic_inc(&page->_count);
 +      page_ref_inc(page);
  
        return true;
  }
@@@ -7202,7 -7345,7 +7345,7 @@@ static void igb_vlan_mode(struct net_de
                wr32(E1000_CTRL, ctrl);
        }
  
-       igb_rlpml_set(adapter);
+       igb_set_vf_vlan_strip(adapter, adapter->vfs_allocated_count, enable);
  }
  
  static int igb_vlan_rx_add_vid(struct net_device *netdev,
        struct e1000_hw *hw = &adapter->hw;
        int pf_id = adapter->vfs_allocated_count;
  
-       /* attempt to add filter to vlvf array */
-       igb_vlvf_set(adapter, vid, true, pf_id);
        /* add the filter since PF can receive vlans w/o entry in vlvf */
-       igb_vfta_set(hw, vid, true);
+       if (!vid || !(adapter->flags & IGB_FLAG_VLAN_PROMISC))
+               igb_vfta_set(hw, vid, pf_id, true, !!vid);
  
        set_bit(vid, adapter->active_vlans);
  
@@@ -7227,16 -7368,12 +7368,12 @@@ static int igb_vlan_rx_kill_vid(struct 
                                __be16 proto, u16 vid)
  {
        struct igb_adapter *adapter = netdev_priv(netdev);
-       struct e1000_hw *hw = &adapter->hw;
        int pf_id = adapter->vfs_allocated_count;
-       s32 err;
-       /* remove vlan from VLVF table array */
-       err = igb_vlvf_set(adapter, vid, false, pf_id);
+       struct e1000_hw *hw = &adapter->hw;
  
-       /* if vid was not present in VLVF just remove it from table */
-       if (err)
-               igb_vfta_set(hw, vid, false);
+       /* remove VID from filter table */
+       if (vid && !(adapter->flags & IGB_FLAG_VLAN_PROMISC))
+               igb_vfta_set(hw, vid, pf_id, false, true);
  
        clear_bit(vid, adapter->active_vlans);
  
  
  static void igb_restore_vlan(struct igb_adapter *adapter)
  {
-       u16 vid;
+       u16 vid = 1;
  
        igb_vlan_mode(adapter->netdev, adapter->netdev->features);
+       igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0);
  
-       for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+       for_each_set_bit_from(vid, adapter->active_vlans, VLAN_N_VID)
                igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
  }
  
@@@ -7704,15 -7842,14 +7842,14 @@@ static void igb_io_resume(struct pci_de
  static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
                             u8 qsel)
  {
-       u32 rar_low, rar_high;
        struct e1000_hw *hw = &adapter->hw;
+       u32 rar_low, rar_high;
  
        /* HW expects these in little endian so we reverse the byte order
-        * from network order (big endian) to little endian
+        * from network order (big endian) to CPU endian
         */
-       rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
-                  ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
-       rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
+       rar_low = le32_to_cpup((__be32 *)(addr));
+       rar_high = le16_to_cpup((__be16 *)(addr + 4));
  
        /* Indicate to hardware the Address is Valid. */
        rar_high |= E1000_RAH_AV;
@@@ -7959,9 -8096,7 +8096,7 @@@ static void igb_init_dmac(struct igb_ad
                         * than the Rx threshold. Set hwm to PBA - max frame
                         * size in 16B units, capping it at PBA - 6KB.
                         */
-                       hwm = 64 * pba - adapter->max_frame_size / 16;
-                       if (hwm < 64 * (pba - 6))
-                               hwm = 64 * (pba - 6);
+                       hwm = 64 * (pba - 6);
                        reg = rd32(E1000_FCRTC);
                        reg &= ~E1000_FCRTC_RTH_COAL_MASK;
                        reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
                        /* Set the DMA Coalescing Rx threshold to PBA - 2 * max
                         * frame size, capping it at PBA - 10KB.
                         */
-                       dmac_thr = pba - adapter->max_frame_size / 512;
-                       if (dmac_thr < pba - 10)
-                               dmac_thr = pba - 10;
+                       dmac_thr = pba - 10;
                        reg = rd32(E1000_DMACR);
                        reg &= ~E1000_DMACR_DMACTHR_MASK;
                        reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
index e6035ff6b86163faeeedbc4b6b484af52f98a986,4d6223da4a19b7ad337714760552bf226b00f23f..569cb0757c93b02f35f5da97f0781cd0fa40953c
@@@ -51,6 -51,8 +51,8 @@@
  #include <linux/prefetch.h>
  #include <scsi/fc/fc_fcoe.h>
  #include <net/vxlan.h>
+ #include <net/pkt_cls.h>
+ #include <net/tc_act/tc_gact.h>
  
  #ifdef CONFIG_OF
  #include <linux/of_net.h>
@@@ -65,6 -67,7 +67,7 @@@
  #include "ixgbe_common.h"
  #include "ixgbe_dcb_82599.h"
  #include "ixgbe_sriov.h"
+ #include "ixgbe_model.h"
  
  char ixgbe_driver_name[] = "ixgbe";
  static const char ixgbe_driver_string[] =
@@@ -1089,7 -1092,7 +1092,7 @@@ static void ixgbe_tx_timeout_reset(stru
   * @tx_ring: tx ring to clean
   **/
  static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
-                              struct ixgbe_ring *tx_ring)
+                              struct ixgbe_ring *tx_ring, int napi_budget)
  {
        struct ixgbe_adapter *adapter = q_vector->adapter;
        struct ixgbe_tx_buffer *tx_buffer;
                total_packets += tx_buffer->gso_segs;
  
                /* free the skb */
-               dev_consume_skb_any(tx_buffer->skb);
+               napi_consume_skb(tx_buffer->skb, napi_budget);
  
                /* unmap skb header data */
                dma_unmap_single(tx_ring->dev,
@@@ -1942,7 -1945,7 +1945,7 @@@ static bool ixgbe_add_rx_frag(struct ix
        /* Even if we own the page, we are not allowed to use atomic_set()
         * This would break get_page_unless_zero() users.
         */
 -      atomic_inc(&page->_count);
 +      page_ref_inc(page);
  
        return true;
  }
@@@ -2784,7 -2787,7 +2787,7 @@@ int ixgbe_poll(struct napi_struct *napi
  #endif
  
        ixgbe_for_each_ring(ring, q_vector->tx)
-               clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring);
+               clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring, budget);
  
        /* Exit if we are called by netpoll or busy polling is active */
        if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector))
@@@ -5545,6 -5548,9 +5548,9 @@@ static int ixgbe_sw_init(struct ixgbe_a
  #endif /* CONFIG_IXGBE_DCB */
  #endif /* IXGBE_FCOE */
  
+       /* initialize static ixgbe jump table entries */
+       adapter->jump_tables[0] = ixgbe_ipv4_fields;
        adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) *
                                     hw->mac.num_rar_entries,
                                     GFP_ATOMIC);
@@@ -8200,6 -8206,225 +8206,225 @@@ int ixgbe_setup_tc(struct net_device *d
        return 0;
  }
  
+ static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter,
+                              struct tc_cls_u32_offload *cls)
+ {
+       int err;
+       spin_lock(&adapter->fdir_perfect_lock);
+       err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, cls->knode.handle);
+       spin_unlock(&adapter->fdir_perfect_lock);
+       return err;
+ }
+ static int ixgbe_configure_clsu32_add_hnode(struct ixgbe_adapter *adapter,
+                                           __be16 protocol,
+                                           struct tc_cls_u32_offload *cls)
+ {
+       /* This ixgbe devices do not support hash tables at the moment
+        * so abort when given hash tables.
+        */
+       if (cls->hnode.divisor > 0)
+               return -EINVAL;
+       set_bit(TC_U32_USERHTID(cls->hnode.handle), &adapter->tables);
+       return 0;
+ }
+ static int ixgbe_configure_clsu32_del_hnode(struct ixgbe_adapter *adapter,
+                                           struct tc_cls_u32_offload *cls)
+ {
+       clear_bit(TC_U32_USERHTID(cls->hnode.handle), &adapter->tables);
+       return 0;
+ }
+ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
+                                 __be16 protocol,
+                                 struct tc_cls_u32_offload *cls)
+ {
+       u32 loc = cls->knode.handle & 0xfffff;
+       struct ixgbe_hw *hw = &adapter->hw;
+       struct ixgbe_mat_field *field_ptr;
+       struct ixgbe_fdir_filter *input;
+       union ixgbe_atr_input mask;
+ #ifdef CONFIG_NET_CLS_ACT
+       const struct tc_action *a;
+ #endif
+       int i, err = 0;
+       u8 queue;
+       u32 handle;
+       memset(&mask, 0, sizeof(union ixgbe_atr_input));
+       handle = cls->knode.handle;
+       /* At the moment cls_u32 jumps to transport layer and skips past
+        * L2 headers. The canonical method to match L2 frames is to use
+        * negative values. However this is error prone at best but really
+        * just broken because there is no way to "know" what sort of hdr
+        * is in front of the transport layer. Fix cls_u32 to support L2
+        * headers when needed.
+        */
+       if (protocol != htons(ETH_P_IP))
+               return -EINVAL;
+       if (cls->knode.link_handle ||
+           cls->knode.link_handle >= IXGBE_MAX_LINK_HANDLE) {
+               struct ixgbe_nexthdr *nexthdr = ixgbe_ipv4_jumps;
+               u32 uhtid = TC_U32_USERHTID(cls->knode.link_handle);
+               if (!test_bit(uhtid, &adapter->tables))
+                       return -EINVAL;
+               for (i = 0; nexthdr[i].jump; i++) {
+                       if (nexthdr->o != cls->knode.sel->offoff ||
+                           nexthdr->s != cls->knode.sel->offshift ||
+                           nexthdr->m != cls->knode.sel->offmask ||
+                           /* do not support multiple key jumps its just mad */
+                           cls->knode.sel->nkeys > 1)
+                               return -EINVAL;
+                       if (nexthdr->off != cls->knode.sel->keys[0].off ||
+                           nexthdr->val != cls->knode.sel->keys[0].val ||
+                           nexthdr->mask != cls->knode.sel->keys[0].mask)
+                               return -EINVAL;
+                       if (uhtid >= IXGBE_MAX_LINK_HANDLE)
+                               return -EINVAL;
+                       adapter->jump_tables[uhtid] = nexthdr->jump;
+               }
+               return 0;
+       }
+       if (loc >= ((1024 << adapter->fdir_pballoc) - 2)) {
+               e_err(drv, "Location out of range\n");
+               return -EINVAL;
+       }
+       /* cls u32 is a graph starting at root node 0x800. The driver tracks
+        * links and also the fields used to advance the parser across each
+        * link (e.g. nexthdr/eat parameters from 'tc'). This way we can map
+        * the u32 graph onto the hardware parse graph denoted in ixgbe_model.h
+        * To add support for new nodes update ixgbe_model.h parse structures
+        * this function _should_ be generic try not to hardcode values here.
+        */
+       if (TC_U32_USERHTID(handle) == 0x800) {
+               field_ptr = adapter->jump_tables[0];
+       } else {
+               if (TC_U32_USERHTID(handle) >= ARRAY_SIZE(adapter->jump_tables))
+                       return -EINVAL;
+               field_ptr = adapter->jump_tables[TC_U32_USERHTID(handle)];
+       }
+       if (!field_ptr)
+               return -EINVAL;
+       input = kzalloc(sizeof(*input), GFP_KERNEL);
+       if (!input)
+               return -ENOMEM;
+       for (i = 0; i < cls->knode.sel->nkeys; i++) {
+               int off = cls->knode.sel->keys[i].off;
+               __be32 val = cls->knode.sel->keys[i].val;
+               __be32 m = cls->knode.sel->keys[i].mask;
+               bool found_entry = false;
+               int j;
+               for (j = 0; field_ptr[j].val; j++) {
+                       if (field_ptr[j].off == off &&
+                           field_ptr[j].mask == m) {
+                               field_ptr[j].val(input, &mask, val, m);
+                               input->filter.formatted.flow_type |=
+                                       field_ptr[j].type;
+                               found_entry = true;
+                               break;
+                       }
+               }
+               if (!found_entry)
+                       goto err_out;
+       }
+       mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
+                                  IXGBE_ATR_L4TYPE_MASK;
+       if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4)
+               mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK;
+ #ifdef CONFIG_NET_CLS_ACT
+       if (list_empty(&cls->knode.exts->actions))
+               goto err_out;
+       list_for_each_entry(a, &cls->knode.exts->actions, list) {
+               if (!is_tcf_gact_shot(a))
+                       goto err_out;
+       }
+ #endif
+       input->action = IXGBE_FDIR_DROP_QUEUE;
+       queue = IXGBE_FDIR_DROP_QUEUE;
+       input->sw_idx = loc;
+       spin_lock(&adapter->fdir_perfect_lock);
+       if (hlist_empty(&adapter->fdir_filter_list)) {
+               memcpy(&adapter->fdir_mask, &mask, sizeof(mask));
+               err = ixgbe_fdir_set_input_mask_82599(hw, &mask);
+               if (err)
+                       goto err_out_w_lock;
+       } else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) {
+               err = -EINVAL;
+               goto err_out_w_lock;
+       }
+       ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask);
+       err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter,
+                                                   input->sw_idx, queue);
+       if (!err)
+               ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
+       spin_unlock(&adapter->fdir_perfect_lock);
+       return err;
+ err_out_w_lock:
+       spin_unlock(&adapter->fdir_perfect_lock);
+ err_out:
+       kfree(input);
+       return -EINVAL;
+ }
+ int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+                    struct tc_to_netdev *tc)
+ {
+       struct ixgbe_adapter *adapter = netdev_priv(dev);
+       if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
+           tc->type == TC_SETUP_CLSU32) {
+               switch (tc->cls_u32->command) {
+               case TC_CLSU32_NEW_KNODE:
+               case TC_CLSU32_REPLACE_KNODE:
+                       return ixgbe_configure_clsu32(adapter,
+                                                     proto, tc->cls_u32);
+               case TC_CLSU32_DELETE_KNODE:
+                       return ixgbe_delete_clsu32(adapter, tc->cls_u32);
+               case TC_CLSU32_NEW_HNODE:
+               case TC_CLSU32_REPLACE_HNODE:
+                       return ixgbe_configure_clsu32_add_hnode(adapter, proto,
+                                                               tc->cls_u32);
+               case TC_CLSU32_DELETE_HNODE:
+                       return ixgbe_configure_clsu32_del_hnode(adapter,
+                                                               tc->cls_u32);
+               default:
+                       return -EINVAL;
+               }
+       }
+       if (tc->type != TC_SETUP_MQPRIO)
+               return -EINVAL;
+       return ixgbe_setup_tc(dev, tc->tc);
+ }
  #ifdef CONFIG_PCI_IOV
  void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter)
  {
@@@ -8262,19 -8487,17 +8487,17 @@@ static int ixgbe_set_features(struct ne
        }
  
        /*
-        * Check if Flow Director n-tuple support was enabled or disabled.  If
-        * the state changed, we need to reset.
+        * Check if Flow Director n-tuple support or hw_tc support was
+        * enabled or disabled.  If the state changed, we need to reset.
         */
-       switch (features & NETIF_F_NTUPLE) {
-       case NETIF_F_NTUPLE:
+       if ((features & NETIF_F_NTUPLE) || (features & NETIF_F_HW_TC)) {
                /* turn off ATR, enable perfect filters and reset */
                if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
                        need_reset = true;
  
                adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
                adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
-               break;
-       default:
+       } else {
                /* turn off perfect filters, enable ATR and reset */
                if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
                        need_reset = true;
                adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
  
                /* We cannot enable ATR if SR-IOV is enabled */
-               if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
-                       break;
-               /* We cannot enable ATR if we have 2 or more traffic classes */
-               if (netdev_get_num_tc(netdev) > 1)
-                       break;
-               /* We cannot enable ATR if RSS is disabled */
-               if (adapter->ring_feature[RING_F_RSS].limit <= 1)
-                       break;
-               /* A sample rate of 0 indicates ATR disabled */
-               if (!adapter->atr_sample_rate)
-                       break;
-               adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
-               break;
+               if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED ||
+                   /* We cannot enable ATR if we have 2 or more tcs */
+                   (netdev_get_num_tc(netdev) > 1) ||
+                   /* We cannot enable ATR if RSS is disabled */
+                   (adapter->ring_feature[RING_F_RSS].limit <= 1) ||
+                   /* A sample rate of 0 indicates ATR disabled */
+                   (!adapter->atr_sample_rate))
+                       ; /* do nothing not supported */
+               else /* otherwise supported and set the flag */
+                       adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
        }
  
        if (features & NETIF_F_HW_VLAN_CTAG_RX)
@@@ -8657,9 -8873,7 +8873,7 @@@ static const struct net_device_ops ixgb
        .ndo_set_vf_trust       = ixgbe_ndo_set_vf_trust,
        .ndo_get_vf_config      = ixgbe_ndo_get_vf_config,
        .ndo_get_stats64        = ixgbe_get_stats64,
- #ifdef CONFIG_IXGBE_DCB
-       .ndo_setup_tc           = ixgbe_setup_tc,
- #endif
+       .ndo_setup_tc           = __ixgbe_setup_tc,
  #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = ixgbe_netpoll,
  #endif
@@@ -9030,7 -9244,8 +9244,8 @@@ skip_sriov
        case ixgbe_mac_X550EM_x:
                netdev->features |= NETIF_F_SCTP_CRC;
                netdev->hw_features |= NETIF_F_SCTP_CRC |
-                                      NETIF_F_NTUPLE;
+                                      NETIF_F_NTUPLE |
+                                      NETIF_F_HW_TC;
                break;
        default:
                break;
index 81b2013ef968a7edc5e45d313f293f964b9feb92,0cb4a093958b5c7434b212f4c7b484c888671c32..879e6276c4736cb6f14b49db220e102c66636092
@@@ -1,5 -1,5 +1,5 @@@
  /*
-  * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+  * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@@ -29,6 -29,8 +29,8 @@@
   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   */
+ #ifndef __MLX5_EN_H__
+ #define __MLX5_EN_H__
  
  #include <linux/if_vlan.h>
  #include <linux/etherdevice.h>
  #include <linux/mlx5/driver.h>
  #include <linux/mlx5/qp.h>
  #include <linux/mlx5/cq.h>
+ #include <linux/mlx5/port.h>
  #include <linux/mlx5/vport.h>
  #include <linux/mlx5/transobj.h>
+ #include <linux/rhashtable.h>
  #include "wq.h"
  #include "mlx5_core.h"
  
  
  #define MLX5E_NUM_MAIN_GROUPS 9
  
+ #ifdef CONFIG_MLX5_CORE_EN_DCB
+ #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
+ #define MLX5E_MIN_BW_ALLOC 1   /* Min percentage of BW allocation */
+ #endif
  static const char vport_strings[][ETH_GSTRING_LEN] = {
        /* vport statistics */
        "rx_packets",
        /* SW counters */
        "tso_packets",
        "tso_bytes",
+       "tso_inner_packets",
+       "tso_inner_bytes",
        "lro_packets",
        "lro_bytes",
        "rx_csum_good",
        "rx_csum_none",
        "rx_csum_sw",
        "tx_csum_offload",
+       "tx_csum_inner",
        "tx_queue_stopped",
        "tx_queue_wake",
        "tx_queue_dropped",
@@@ -133,18 -145,21 +145,21 @@@ struct mlx5e_vport_stats 
        /* SW counters */
        u64 tso_packets;
        u64 tso_bytes;
+       u64 tso_inner_packets;
+       u64 tso_inner_bytes;
        u64 lro_packets;
        u64 lro_bytes;
        u64 rx_csum_good;
        u64 rx_csum_none;
        u64 rx_csum_sw;
        u64 tx_csum_offload;
+       u64 tx_csum_inner;
        u64 tx_queue_stopped;
        u64 tx_queue_wake;
        u64 tx_queue_dropped;
        u64 rx_wqe_err;
  
- #define NUM_VPORT_COUNTERS     32
+ #define NUM_VPORT_COUNTERS     35
  };
  
  static const char pport_strings[][ETH_GSTRING_LEN] = {
@@@ -247,24 -262,32 +262,32 @@@ static const char sq_stats_strings[][ET
        "bytes",
        "tso_packets",
        "tso_bytes",
+       "tso_inner_packets",
+       "tso_inner_bytes",
+       "csum_offload_inner",
+       "nop",
        "csum_offload_none",
        "stopped",
        "wake",
        "dropped",
-       "nop"
  };
  
  struct mlx5e_sq_stats {
+       /* commonly accessed in data path */
        u64 packets;
        u64 bytes;
        u64 tso_packets;
        u64 tso_bytes;
+       u64 tso_inner_packets;
+       u64 tso_inner_bytes;
+       u64 csum_offload_inner;
+       u64 nop;
+       /* less likely accessed in data path */
        u64 csum_offload_none;
        u64 stopped;
        u64 wake;
        u64 dropped;
-       u64 nop;
- #define NUM_SQ_STATS 9
+ #define NUM_SQ_STATS 12
  };
  
  struct mlx5e_stats {
@@@ -276,7 -299,6 +299,6 @@@ struct mlx5e_params 
        u8  log_sq_size;
        u8  log_rq_size;
        u16 num_channels;
-       u8  default_vlan_prio;
        u8  num_tc;
        u16 rx_cq_moderation_usec;
        u16 rx_cq_moderation_pkts;
        u8  rss_hfunc;
        u8  toeplitz_hash_key[40];
        u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
+ #ifdef CONFIG_MLX5_CORE_EN_DCB
+       struct ieee_ets ets;
+ #endif
  };
  
  struct mlx5e_tstamp {
@@@ -363,6 -388,7 +388,7 @@@ struct mlx5e_sq_dma 
  
  enum {
        MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
+       MLX5E_SQ_STATE_BF_ENABLE,
  };
  
  struct mlx5e_sq {
        struct mlx5_wq_cyc         wq;
        u32                        dma_fifo_mask;
        void __iomem              *uar_map;
-       void __iomem              *uar_bf_map;
        struct netdev_queue       *txq;
        u32                        sqn;
        u16                        bf_buf_size;
@@@ -492,21 -517,33 +517,33 @@@ struct mlx5e_vlan_db 
        bool          filter_disabled;
  };
  
+ struct mlx5e_vxlan_db {
+       spinlock_t                      lock; /* protect vxlan table */
+       struct radix_tree_root          tree;
+ };
  struct mlx5e_flow_table {
        int num_groups;
        struct mlx5_flow_table          *t;
        struct mlx5_flow_group          **g;
  };
  
+ struct mlx5e_tc_flow_table {
+       struct mlx5_flow_table          *t;
+       struct rhashtable_params        ht_params;
+       struct rhashtable               ht;
+ };
  struct mlx5e_flow_tables {
        struct mlx5_flow_namespace      *ns;
+       struct mlx5e_tc_flow_table      tc;
        struct mlx5e_flow_table         vlan;
        struct mlx5e_flow_table         main;
  };
  
  struct mlx5e_priv {
        /* priv data path fields - start */
-       int                        default_vlan_prio;
        struct mlx5e_sq            **txq_to_sq_map;
        int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
        /* priv data path fields - end */
        struct mlx5_uar            cq_uar;
        u32                        pdn;
        u32                        tdn;
 -      struct mlx5_core_mr        mr;
 +      struct mlx5_core_mkey      mkey;
        struct mlx5e_rq            drop_rq;
  
        struct mlx5e_channel     **channel;
        struct mlx5e_flow_tables   fts;
        struct mlx5e_eth_addr_db   eth_addr;
        struct mlx5e_vlan_db       vlan;
+       struct mlx5e_vxlan_db      vxlan;
  
        struct mlx5e_params        params;
-       spinlock_t                 async_events_spinlock; /* sync hw events */
        struct work_struct         update_carrier_work;
        struct work_struct         set_rx_mode_work;
        struct delayed_work        update_stats_work;
@@@ -592,7 -629,7 +629,7 @@@ netdev_tx_t mlx5e_xmit(struct sk_buff *
  void mlx5e_completion_event(struct mlx5_core_cq *mcq);
  void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
  int mlx5e_napi_poll(struct napi_struct *napi, int budget);
- bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq);
+ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
  int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
  bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
  struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
@@@ -640,16 -677,12 +677,12 @@@ static inline void mlx5e_tx_notify_hw(s
         * doorbell
         */
        wmb();
-       if (bf_sz) {
-               __iowrite64_copy(sq->uar_bf_map + ofst, &wqe->ctrl, bf_sz);
-               /* flush the write-combining mapped buffer */
-               wmb();
-       } else {
+       if (bf_sz)
+               __iowrite64_copy(sq->uar_map + ofst, &wqe->ctrl, bf_sz);
+       else
                mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL);
-       }
+       /* flush the write-combining mapped buffer */
+       wmb();
  
        sq->bf_offset ^= sq->bf_buf_size;
  }
@@@ -669,4 -702,11 +702,11 @@@ static inline int mlx5e_get_max_num_cha
  }
  
  extern const struct ethtool_ops mlx5e_ethtool_ops;
+ #ifdef CONFIG_MLX5_CORE_EN_DCB
+ extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
+ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets);
+ #endif
  u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev);
+ #endif /* __MLX5_EN_H__ */
index 0c49951606b6a664029b44becd8f454c1abc8885,ac5807803c8471d7b7015fb3c992e737e3b83f98..e0adb604f461e00e64952f70a661b62e1af787cc
@@@ -1,5 -1,5 +1,5 @@@
  /*
-  * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+  * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
   * SOFTWARE.
   */
  
+ #include <net/tc_act/tc_gact.h>
+ #include <net/pkt_cls.h>
  #include <linux/mlx5/fs.h>
+ #include <net/vxlan.h>
  #include "en.h"
+ #include "en_tc.h"
  #include "eswitch.h"
+ #include "vxlan.h"
  
  struct mlx5e_rq_param {
        u32                        rqc[MLX5_ST_SZ_DW(rqc)];
@@@ -147,9 -152,12 +152,12 @@@ void mlx5e_update_stats(struct mlx5e_pr
        s->tx_bytes             = 0;
        s->tso_packets          = 0;
        s->tso_bytes            = 0;
+       s->tso_inner_packets    = 0;
+       s->tso_inner_bytes      = 0;
        s->tx_queue_stopped     = 0;
        s->tx_queue_wake        = 0;
        s->tx_queue_dropped     = 0;
+       s->tx_csum_inner        = 0;
        tx_offload_none         = 0;
        s->lro_packets          = 0;
        s->lro_bytes            = 0;
                        s->tx_bytes             += sq_stats->bytes;
                        s->tso_packets          += sq_stats->tso_packets;
                        s->tso_bytes            += sq_stats->tso_bytes;
+                       s->tso_inner_packets    += sq_stats->tso_inner_packets;
+                       s->tso_inner_bytes      += sq_stats->tso_inner_bytes;
                        s->tx_queue_stopped     += sq_stats->stopped;
                        s->tx_queue_wake        += sq_stats->wake;
                        s->tx_queue_dropped     += sq_stats->dropped;
+                       s->tx_csum_inner        += sq_stats->csum_offload_inner;
                        tx_offload_none         += sq_stats->csum_offload_none;
                }
        }
                MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
  
        /* Update calculated offload counters */
-       s->tx_csum_offload = s->tx_packets - tx_offload_none;
+       s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner;
        s->rx_csum_good    = s->rx_packets - s->rx_csum_none -
                               s->rx_csum_sw;
  
@@@ -258,9 -269,14 +269,14 @@@ static void mlx5e_update_stats_work(str
        mutex_unlock(&priv->state_lock);
  }
  
- static void __mlx5e_async_event(struct mlx5e_priv *priv,
-                               enum mlx5_dev_event event)
+ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
+                             enum mlx5_dev_event event, unsigned long param)
  {
+       struct mlx5e_priv *priv = vpriv;
+       if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
+               return;
        switch (event) {
        case MLX5_DEV_EVENT_PORT_UP:
        case MLX5_DEV_EVENT_PORT_DOWN:
        }
  }
  
- static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
-                             enum mlx5_dev_event event, unsigned long param)
- {
-       struct mlx5e_priv *priv = vpriv;
-       spin_lock(&priv->async_events_spinlock);
-       if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
-               __mlx5e_async_event(priv, event);
-       spin_unlock(&priv->async_events_spinlock);
- }
  static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
  {
        set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
  
  static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
  {
-       spin_lock_irq(&priv->async_events_spinlock);
        clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
-       spin_unlock_irq(&priv->async_events_spinlock);
+       synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC));
  }
  
  #define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN))
@@@ -538,7 -542,7 +542,7 @@@ static int mlx5e_create_sq(struct mlx5e
        int txq_ix;
        int err;
  
-       err = mlx5_alloc_map_uar(mdev, &sq->uar);
+       err = mlx5_alloc_map_uar(mdev, &sq->uar, true);
        if (err)
                return err;
  
                goto err_unmap_free_uar;
  
        sq->wq.db       = &sq->wq.db[MLX5_SND_DBR];
-       sq->uar_map     = sq->uar.map;
-       sq->uar_bf_map  = sq->uar.bf_map;
+       if (sq->uar.bf_map) {
+               set_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state);
+               sq->uar_map = sq->uar.bf_map;
+       } else {
+               sq->uar_map = sq->uar.map;
+       }
        sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
        sq->max_inline  = param->max_inline;
  
@@@ -860,12 -868,10 +868,10 @@@ static int mlx5e_open_cq(struct mlx5e_c
        if (err)
                goto err_destroy_cq;
  
-       err = mlx5_core_modify_cq_moderation(mdev, &cq->mcq,
-                                            moderation_usecs,
-                                            moderation_frames);
-       if (err)
-               goto err_destroy_cq;
+       if (MLX5_CAP_GEN(mdev, cq_moderation))
+               mlx5_core_modify_cq_moderation(mdev, &cq->mcq,
+                                              moderation_usecs,
+                                              moderation_frames);
        return 0;
  
  err_destroy_cq:
@@@ -973,7 -979,7 +979,7 @@@ static int mlx5e_open_channel(struct ml
        c->cpu      = cpu;
        c->pdev     = &priv->mdev->pdev->dev;
        c->netdev   = priv->netdev;
 -      c->mkey_be  = cpu_to_be32(priv->mr.key);
 +      c->mkey_be  = cpu_to_be32(priv->mkey.key);
        c->num_tc   = priv->params.num_tc;
  
        mlx5e_build_channeltc_to_txq_map(priv, ix);
@@@ -1054,6 -1060,15 +1060,15 @@@ static void mlx5e_build_rq_param(struc
        param->wq.linear = 1;
  }
  
+ static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param)
+ {
+       void *rqc = param->rqc;
+       void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+       MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
+       MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
+ }
  static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
                                 struct mlx5e_sq_param *param)
  {
@@@ -1410,6 -1425,24 +1425,24 @@@ static int mlx5e_set_dev_port_mtu(struc
        return 0;
  }
  
+ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
+ {
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       int nch = priv->params.num_channels;
+       int ntc = priv->params.num_tc;
+       int tc;
+       netdev_reset_tc(netdev);
+       if (ntc == 1)
+               return;
+       netdev_set_num_tc(netdev, ntc);
+       for (tc = 0; tc < ntc; tc++)
+               netdev_set_tc_queue(netdev, tc, nch, tc * nch);
+ }
  int mlx5e_open_locked(struct net_device *netdev)
  {
        struct mlx5e_priv *priv = netdev_priv(netdev);
  
        set_bit(MLX5E_STATE_OPENED, &priv->state);
  
+       mlx5e_netdev_set_tcs(netdev);
        num_txqs = priv->params.num_channels * priv->params.num_tc;
        netif_set_real_num_tx_queues(netdev, num_txqs);
        netif_set_real_num_rx_queues(netdev, priv->params.num_channels);
                goto err_close_channels;
        }
  
-       mlx5e_update_carrier(priv);
        mlx5e_redirect_rqts(priv);
+       mlx5e_update_carrier(priv);
        mlx5e_timestamp_init(priv);
  
        schedule_delayed_work(&priv->update_stats_work, 0);
@@@ -1480,8 -1515,8 +1515,8 @@@ int mlx5e_close_locked(struct net_devic
        clear_bit(MLX5E_STATE_OPENED, &priv->state);
  
        mlx5e_timestamp_cleanup(priv);
-       mlx5e_redirect_rqts(priv);
        netif_carrier_off(priv->netdev);
+       mlx5e_redirect_rqts(priv);
        mlx5e_close_channels(priv);
  
        return 0;
@@@ -1563,8 -1598,7 +1598,7 @@@ static int mlx5e_open_drop_rq(struct ml
  
        memset(&cq_param, 0, sizeof(cq_param));
        memset(&rq_param, 0, sizeof(rq_param));
-       mlx5e_build_rx_cq_param(priv, &cq_param);
-       mlx5e_build_rq_param(priv, &rq_param);
+       mlx5e_build_drop_rq_param(&rq_param);
  
        err = mlx5e_create_drop_cq(priv, cq, &cq_param);
        if (err)
@@@ -1612,7 -1646,7 +1646,7 @@@ static int mlx5e_create_tis(struct mlx5
  
        memset(in, 0, sizeof(in));
  
-       MLX5_SET(tisc, tisc, prio,  tc);
+       MLX5_SET(tisc, tisc, prio, tc << 1);
        MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
  
        return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]);
@@@ -1628,7 -1662,7 +1662,7 @@@ static int mlx5e_create_tises(struct ml
        int err;
        int tc;
  
-       for (tc = 0; tc < priv->params.num_tc; tc++) {
+       for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) {
                err = mlx5e_create_tis(priv, tc);
                if (err)
                        goto err_close_tises;
@@@ -1647,7 -1681,7 +1681,7 @@@ static void mlx5e_destroy_tises(struct 
  {
        int tc;
  
-       for (tc = 0; tc < priv->params.num_tc; tc++)
+       for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++)
                mlx5e_destroy_tis(priv, tc);
  }
  
@@@ -1824,6 -1858,58 +1858,58 @@@ static void mlx5e_destroy_tirs(struct m
                mlx5e_destroy_tir(priv, i);
  }
  
+ static int mlx5e_setup_tc(struct net_device *netdev, u8 tc)
+ {
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       bool was_opened;
+       int err = 0;
+       if (tc && tc != MLX5E_MAX_NUM_TC)
+               return -EINVAL;
+       mutex_lock(&priv->state_lock);
+       was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+       if (was_opened)
+               mlx5e_close_locked(priv->netdev);
+       priv->params.num_tc = tc ? tc : 1;
+       if (was_opened)
+               err = mlx5e_open_locked(priv->netdev);
+       mutex_unlock(&priv->state_lock);
+       return err;
+ }
+ static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
+                             __be16 proto, struct tc_to_netdev *tc)
+ {
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+               goto mqprio;
+       switch (tc->type) {
+       case TC_SETUP_CLSFLOWER:
+               switch (tc->cls_flower->command) {
+               case TC_CLSFLOWER_REPLACE:
+                       return mlx5e_configure_flower(priv, proto, tc->cls_flower);
+               case TC_CLSFLOWER_DESTROY:
+                       return mlx5e_delete_flower(priv, tc->cls_flower);
+               }
+       default:
+               return -EOPNOTSUPP;
+       }
+ mqprio:
+       if (tc->type != TC_SETUP_MQPRIO)
+               return -EINVAL;
+       return mlx5e_setup_tc(dev, tc->tc);
+ }
  static struct rtnl_link_stats64 *
  mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
  {
@@@ -1903,6 -1989,13 +1989,13 @@@ static int mlx5e_set_features(struct ne
                        mlx5e_disable_vlan_filter(priv);
        }
  
+       if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) &&
+           mlx5e_tc_num_filters(priv)) {
+               netdev_err(netdev,
+                          "Active offloaded tc filters, can't turn hw_tc_offload off\n");
+               return -EINVAL;
+       }
        return err;
  }
  
@@@ -2026,10 -2119,84 +2119,84 @@@ static int mlx5e_get_vf_stats(struct ne
                                            vf_stats);
  }
  
+ static void mlx5e_add_vxlan_port(struct net_device *netdev,
+                                sa_family_t sa_family, __be16 port)
+ {
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       if (!mlx5e_vxlan_allowed(priv->mdev))
+               return;
+       mlx5e_vxlan_add_port(priv, be16_to_cpu(port));
+ }
+ static void mlx5e_del_vxlan_port(struct net_device *netdev,
+                                sa_family_t sa_family, __be16 port)
+ {
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       if (!mlx5e_vxlan_allowed(priv->mdev))
+               return;
+       mlx5e_vxlan_del_port(priv, be16_to_cpu(port));
+ }
+ static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv,
+                                                   struct sk_buff *skb,
+                                                   netdev_features_t features)
+ {
+       struct udphdr *udph;
+       u16 proto;
+       u16 port = 0;
+       switch (vlan_get_protocol(skb)) {
+       case htons(ETH_P_IP):
+               proto = ip_hdr(skb)->protocol;
+               break;
+       case htons(ETH_P_IPV6):
+               proto = ipv6_hdr(skb)->nexthdr;
+               break;
+       default:
+               goto out;
+       }
+       if (proto == IPPROTO_UDP) {
+               udph = udp_hdr(skb);
+               port = be16_to_cpu(udph->dest);
+       }
+       /* Verify if UDP port is being offloaded by HW */
+       if (port && mlx5e_vxlan_lookup_port(priv, port))
+               return features;
+ out:
+       /* Disable CSUM and GSO if the udp dport is not offloaded by HW */
+       return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+ }
+ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+                                             struct net_device *netdev,
+                                             netdev_features_t features)
+ {
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       features = vlan_features_check(skb, features);
+       features = vxlan_features_check(skb, features);
+       /* Validate if the tunneled packet is being offloaded by HW */
+       if (skb->encapsulation &&
+           (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
+               return mlx5e_vxlan_features_check(priv, skb, features);
+       return features;
+ }
  static const struct net_device_ops mlx5e_netdev_ops_basic = {
        .ndo_open                = mlx5e_open,
        .ndo_stop                = mlx5e_close,
        .ndo_start_xmit          = mlx5e_xmit,
+       .ndo_setup_tc            = mlx5e_ndo_setup_tc,
+       .ndo_select_queue        = mlx5e_select_queue,
        .ndo_get_stats64         = mlx5e_get_stats,
        .ndo_set_rx_mode         = mlx5e_set_rx_mode,
        .ndo_set_mac_address     = mlx5e_set_mac,
@@@ -2044,6 -2211,8 +2211,8 @@@ static const struct net_device_ops mlx5
        .ndo_open                = mlx5e_open,
        .ndo_stop                = mlx5e_close,
        .ndo_start_xmit          = mlx5e_xmit,
+       .ndo_setup_tc            = mlx5e_ndo_setup_tc,
+       .ndo_select_queue        = mlx5e_select_queue,
        .ndo_get_stats64         = mlx5e_get_stats,
        .ndo_set_rx_mode         = mlx5e_set_rx_mode,
        .ndo_set_mac_address     = mlx5e_set_mac,
        .ndo_set_features        = mlx5e_set_features,
        .ndo_change_mtu          = mlx5e_change_mtu,
        .ndo_do_ioctl            = mlx5e_ioctl,
+       .ndo_add_vxlan_port      = mlx5e_add_vxlan_port,
+       .ndo_del_vxlan_port      = mlx5e_del_vxlan_port,
+       .ndo_features_check      = mlx5e_features_check,
        .ndo_set_vf_mac          = mlx5e_set_vf_mac,
        .ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
        .ndo_get_vf_config       = mlx5e_get_vf_config,
@@@ -2078,6 -2250,8 +2250,8 @@@ static int mlx5e_check_required_hca_cap
        }
        if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
                mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
+       if (!MLX5_CAP_GEN(mdev, cq_moderation))
+               mlx5_core_warn(mdev, "CQ modiration is not supported\n");
  
        return 0;
  }
@@@ -2091,6 -2265,24 +2265,24 @@@ u16 mlx5e_get_max_inline_cap(struct mlx
               2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
  }
  
+ #ifdef CONFIG_MLX5_CORE_EN_DCB
+ static void mlx5e_ets_init(struct mlx5e_priv *priv)
+ {
+       int i;
+       priv->params.ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
+       for (i = 0; i < priv->params.ets.ets_cap; i++) {
+               priv->params.ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+               priv->params.ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
+               priv->params.ets.prio_tc[i] = i;
+       }
+       /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
+       priv->params.ets.prio_tc[0] = 1;
+       priv->params.ets.prio_tc[1] = 0;
+ }
+ #endif
  void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
                                   int num_channels)
  {
@@@ -2122,7 -2314,6 +2314,6 @@@ static void mlx5e_build_netdev_priv(str
        priv->params.min_rx_wqes           =
                MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
        priv->params.num_tc                = 1;
-       priv->params.default_vlan_prio     = 0;
        priv->params.rss_hfunc             = ETH_RSS_HASH_XOR;
  
        netdev_rss_key_fill(priv->params.toeplitz_hash_key,
        priv->mdev                         = mdev;
        priv->netdev                       = netdev;
        priv->params.num_channels          = num_channels;
-       priv->default_vlan_prio            = priv->params.default_vlan_prio;
  
-       spin_lock_init(&priv->async_events_spinlock);
+ #ifdef CONFIG_MLX5_CORE_EN_DCB
+       mlx5e_ets_init(priv);
+ #endif
        mutex_init(&priv->state_lock);
  
        INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
@@@ -2166,10 -2359,14 +2359,14 @@@ static void mlx5e_build_netdev(struct n
  
        SET_NETDEV_DEV(netdev, &mdev->pdev->dev);
  
-       if (MLX5_CAP_GEN(mdev, vport_group_manager))
+       if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
                netdev->netdev_ops = &mlx5e_netdev_ops_sriov;
-       else
+ #ifdef CONFIG_MLX5_CORE_EN_DCB
+               netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+ #endif
+       } else {
                netdev->netdev_ops = &mlx5e_netdev_ops_basic;
+       }
  
        netdev->watchdog_timeo    = 15 * HZ;
  
        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_RX;
        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
  
+       if (mlx5e_vxlan_allowed(mdev)) {
+               netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL;
+               netdev->hw_enc_features |= NETIF_F_IP_CSUM;
+               netdev->hw_enc_features |= NETIF_F_RXCSUM;
+               netdev->hw_enc_features |= NETIF_F_TSO;
+               netdev->hw_enc_features |= NETIF_F_TSO6;
+               netdev->hw_enc_features |= NETIF_F_RXHASH;
+               netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
+       }
        netdev->features          = netdev->hw_features;
        if (!priv->params.lro_en)
                netdev->features  &= ~NETIF_F_LRO;
  
+ #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
+       if (FT_CAP(flow_modify_en) &&
+           FT_CAP(modify_root) &&
+           FT_CAP(identified_miss_table_mode) &&
+           FT_CAP(flow_table_modify))
+               priv->netdev->hw_features      |= NETIF_F_HW_TC;
        netdev->features         |= NETIF_F_HIGHDMA;
  
        netdev->priv_flags       |= IFF_UNICAST_FLT;
  }
  
  static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
 -                           struct mlx5_core_mr *mr)
 +                           struct mlx5_core_mkey *mkey)
  {
        struct mlx5_core_dev *mdev = priv->mdev;
        struct mlx5_create_mkey_mbox_in *in;
        in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
  
 -      err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
 +      err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL,
                                    NULL);
  
        kvfree(in);
@@@ -2238,7 -2452,9 +2452,9 @@@ static void *mlx5e_create_netdev(struc
        if (mlx5e_check_required_hca_cap(mdev))
                return NULL;
  
-       netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch, nch);
+       netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
+                                   nch * MLX5E_MAX_NUM_TC,
+                                   nch);
        if (!netdev) {
                mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
                return NULL;
  
        priv = netdev_priv(netdev);
  
-       err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
+       err = mlx5_alloc_map_uar(mdev, &priv->cq_uar, false);
        if (err) {
                mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err);
                goto err_free_netdev;
                goto err_dealloc_pd;
        }
  
 -      err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
 +      err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey);
        if (err) {
                mlx5_core_err(mdev, "create mkey failed, %d\n", err);
                goto err_dealloc_transport_domain;
  
        mlx5e_init_eth_addr(priv);
  
+       mlx5e_vxlan_init(priv);
+       err = mlx5e_tc_init(priv);
+       if (err)
+               goto err_destroy_flow_tables;
+ #ifdef CONFIG_MLX5_CORE_EN_DCB
+       mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets);
+ #endif
        err = register_netdev(netdev);
        if (err) {
                mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
-               goto err_destroy_flow_tables;
+               goto err_tc_cleanup;
        }
  
+       if (mlx5e_vxlan_allowed(mdev))
+               vxlan_get_rx_port(netdev);
        mlx5e_enable_async_events(priv);
        schedule_work(&priv->set_rx_mode_work);
  
        return priv;
  
+ err_tc_cleanup:
+       mlx5e_tc_cleanup(priv);
  err_destroy_flow_tables:
        mlx5e_destroy_flow_tables(priv);
  
@@@ -2343,7 -2575,7 +2575,7 @@@ err_destroy_tises
        mlx5e_destroy_tises(priv);
  
  err_destroy_mkey:
 -      mlx5_core_destroy_mkey(mdev, &priv->mr);
 +      mlx5_core_destroy_mkey(mdev, &priv->mkey);
  
  err_dealloc_transport_domain:
        mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
@@@ -2371,13 -2603,15 +2603,15 @@@ static void mlx5e_destroy_netdev(struc
        mlx5e_disable_async_events(priv);
        flush_scheduled_work();
        unregister_netdev(netdev);
+       mlx5e_tc_cleanup(priv);
+       mlx5e_vxlan_cleanup(priv);
        mlx5e_destroy_flow_tables(priv);
        mlx5e_destroy_tirs(priv);
        mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT);
        mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT);
        mlx5e_close_drop_rq(priv);
        mlx5e_destroy_tises(priv);
 -      mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
 +      mlx5_core_destroy_mkey(priv->mdev, &priv->mkey);
        mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
        mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
        mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
index bf3446794bd5f5d3eb05229d3e57fdb10ed3f046,e848d708d2b733caf9edc9a0f0f0f1cdd60ad688..5121be4675d14de3ea8f7722434945caeb307eb9
  #define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
                           LEFTOVERS_MAX_FT)
  
- #define KERNEL_MAX_FT 2
- #define KERNEL_NUM_PRIOS 1
+ #define KERNEL_MAX_FT 3
+ #define KERNEL_NUM_PRIOS 2
  #define KENREL_MIN_LEVEL 2
  
 +#define ANCHOR_MAX_FT 1
 +#define ANCHOR_NUM_PRIOS 1
 +#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
  struct node_caps {
        size_t  arr_sz;
        long    *caps;
@@@ -95,7 -92,7 +95,7 @@@ static struct init_tree_node 
        int max_ft;
  } root_fs = {
        .type = FS_TYPE_NAMESPACE,
 -      .ar_size = 3,
 +      .ar_size = 4,
        .children = (struct init_tree_node[]) {
                ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
                         FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
                                          FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
                                          FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
                         ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))),
 +              ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
 +                       ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_MAX_FT))),
        }
  };
  
@@@ -201,10 -196,8 +201,10 @@@ static void tree_put_node(struct fs_nod
  
  static int tree_remove_node(struct fs_node *node)
  {
 -      if (atomic_read(&node->refcount) > 1)
 -              return -EPERM;
 +      if (atomic_read(&node->refcount) > 1) {
 +              atomic_dec(&node->refcount);
 +              return -EEXIST;
 +      }
        tree_put_node(node);
        return 0;
  }
@@@ -367,13 -360,8 +367,13 @@@ static void del_rule(struct fs_node *no
        memcpy(match_value, fte->val, sizeof(fte->val));
        fs_get_obj(ft, fg->node.parent);
        list_del(&rule->node.list);
-       fte->dests_size--;
-       if (fte->dests_size) {
 +      if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 +              mutex_lock(&rule->dest_attr.ft->lock);
 +              list_del(&rule->next_ft);
 +              mutex_unlock(&rule->dest_attr.ft->lock);
 +      }
+       if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+           --fte->dests_size) {
                err = mlx5_cmd_update_fte(dev, ft,
                                          fg->id, fte);
                if (err)
@@@ -477,8 -465,6 +477,8 @@@ static struct mlx5_flow_table *alloc_fl
        ft->node.type = FS_TYPE_FLOW_TABLE;
        ft->type = table_type;
        ft->max_fte = max_fte;
 +      INIT_LIST_HEAD(&ft->fwd_rules);
 +      mutex_init(&ft->lock);
  
        return ft;
  }
@@@ -615,63 -601,9 +615,63 @@@ static int update_root_ft_create(struc
        return err;
  }
  
 +static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 +                                      struct mlx5_flow_destination *dest)
 +{
 +      struct mlx5_flow_table *ft;
 +      struct mlx5_flow_group *fg;
 +      struct fs_fte *fte;
 +      int err = 0;
 +
 +      fs_get_obj(fte, rule->node.parent);
 +      if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 +              return -EINVAL;
 +      lock_ref_node(&fte->node);
 +      fs_get_obj(fg, fte->node.parent);
 +      fs_get_obj(ft, fg->node.parent);
 +
 +      memcpy(&rule->dest_attr, dest, sizeof(*dest));
 +      err = mlx5_cmd_update_fte(get_dev(&ft->node),
 +                                ft, fg->id, fte);
 +      unlock_ref_node(&fte->node);
 +
 +      return err;
 +}
 +
 +/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft  */
 +static int connect_fwd_rules(struct mlx5_core_dev *dev,
 +                           struct mlx5_flow_table *new_next_ft,
 +                           struct mlx5_flow_table *old_next_ft)
 +{
 +      struct mlx5_flow_destination dest;
 +      struct mlx5_flow_rule *iter;
 +      int err = 0;
 +
 +      /* new_next_ft and old_next_ft could be NULL only
 +       * when we create/destroy the anchor flow table.
 +       */
 +      if (!new_next_ft || !old_next_ft)
 +              return 0;
 +
 +      dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 +      dest.ft = new_next_ft;
 +
 +      mutex_lock(&old_next_ft->lock);
 +      list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
 +      mutex_unlock(&old_next_ft->lock);
 +      list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
 +              err = mlx5_modify_rule_destination(iter, &dest);
 +              if (err)
 +                      pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
 +                             new_next_ft->id);
 +      }
 +      return 0;
 +}
 +
  static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
                              struct fs_prio *prio)
  {
 +      struct mlx5_flow_table *next_ft;
        int err = 0;
  
        /* Connect_prev_fts and update_root_ft_create are mutually exclusive */
                err = connect_prev_fts(dev, ft, prio);
                if (err)
                        return err;
 +
 +              next_ft = find_next_chained_ft(prio);
 +              err = connect_fwd_rules(dev, ft, next_ft);
 +              if (err)
 +                      return err;
        }
  
        if (MLX5_CAP_FLOWTABLE(dev,
@@@ -835,9 -762,9 +835,10 @@@ static struct mlx5_flow_rule *alloc_rul
        if (!rule)
                return NULL;
  
 +      INIT_LIST_HEAD(&rule->next_ft);
        rule->node.type = FS_TYPE_FLOW_DEST;
-       memcpy(&rule->dest_attr, dest, sizeof(*dest));
+       if (dest)
+               memcpy(&rule->dest_attr, dest, sizeof(*dest));
  
        return rule;
  }
@@@ -856,16 -783,12 +857,17 @@@ static struct mlx5_flow_rule *add_rule_
                return ERR_PTR(-ENOMEM);
  
        fs_get_obj(ft, fg->node.parent);
 -      /* Add dest to dests list- added as first element after the head */
 +      /* Add dest to dests list- we need flow tables to be in the
 +       * end of the list for forward to next prio rules.
 +       */
        tree_init_node(&rule->node, 1, del_rule);
 -      list_add_tail(&rule->node.list, &fte->node.children);
 +      if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
 +              list_add(&rule->node.list, &fte->node.children);
 +      else
 +              list_add_tail(&rule->node.list, &fte->node.children);
-       fte->dests_size++;
-       if (fte->dests_size == 1)
+       if (dest)
+               fte->dests_size++;
+       if (fte->dests_size == 1 || !dest)
                err = mlx5_cmd_create_fte(get_dev(&ft->node),
                                          ft, fg->id, fte);
        else
  free_rule:
        list_del(&rule->node.list);
        kfree(rule);
-       fte->dests_size--;
+       if (dest)
+               fte->dests_size--;
        return ERR_PTR(err);
  }
  
@@@ -982,25 -906,6 +985,25 @@@ out
        return fg;
  }
  
 +static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
 +                                           struct mlx5_flow_destination *dest)
 +{
 +      struct mlx5_flow_rule *rule;
 +
 +      list_for_each_entry(rule, &fte->node.children, node.list) {
 +              if (rule->dest_attr.type == dest->type) {
 +                      if ((dest->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
 +                           dest->vport_num == rule->dest_attr.vport_num) ||
 +                          (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
 +                           dest->ft == rule->dest_attr.ft) ||
 +                          (dest->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
 +                           dest->tir_num == rule->dest_attr.tir_num))
 +                              return rule;
 +              }
 +      }
 +      return NULL;
 +}
 +
  static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
                                          u32 *match_value,
                                          u8 action,
                nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
                if (compare_match_value(&fg->mask, match_value, &fte->val) &&
                    action == fte->action && flow_tag == fte->flow_tag) {
 +                      rule = find_flow_rule(fte, dest);
 +                      if (rule) {
 +                              atomic_inc(&rule->node.refcount);
 +                              unlock_ref_node(&fte->node);
 +                              unlock_ref_node(&fg->node);
 +                              return rule;
 +                      }
                        rule = add_rule_fte(fte, fg, dest);
                        unlock_ref_node(&fte->node);
                        if (IS_ERR(rule))
@@@ -1089,18 -987,21 +1092,21 @@@ static struct mlx5_flow_rule *add_rule_
        return rule;
  }
  
 -struct mlx5_flow_rule *
 -mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 -                 u8 match_criteria_enable,
 -                 u32 *match_criteria,
 -                 u32 *match_value,
 -                 u32 action,
 -                 u32 flow_tag,
 -                 struct mlx5_flow_destination *dest)
 +static struct mlx5_flow_rule *
 +_mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 +                  u8 match_criteria_enable,
 +                  u32 *match_criteria,
 +                  u32 *match_value,
 +                  u32 action,
 +                  u32 flow_tag,
 +                  struct mlx5_flow_destination *dest)
  {
        struct mlx5_flow_group *g;
        struct mlx5_flow_rule *rule;
  
+       if ((action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !dest)
+               return ERR_PTR(-EINVAL);
        nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
        fs_for_each_fg(g, ft)
                if (compare_match_criteria(g->mask.match_criteria_enable,
@@@ -1119,63 -1020,6 +1125,63 @@@ unlock
        unlock_ref_node(&ft->node);
        return rule;
  }
 +
 +static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
 +{
 +      return ((ft->type == FS_FT_NIC_RX) &&
 +              (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
 +}
 +
 +struct mlx5_flow_rule *
 +mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 +                 u8 match_criteria_enable,
 +                 u32 *match_criteria,
 +                 u32 *match_value,
 +                 u32 action,
 +                 u32 flow_tag,
 +                 struct mlx5_flow_destination *dest)
 +{
 +      struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 +      struct mlx5_flow_destination gen_dest;
 +      struct mlx5_flow_table *next_ft = NULL;
 +      struct mlx5_flow_rule *rule = NULL;
 +      u32 sw_action = action;
 +      struct fs_prio *prio;
 +
 +      fs_get_obj(prio, ft->node.parent);
 +      if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 +              if (!fwd_next_prio_supported(ft))
 +                      return ERR_PTR(-EOPNOTSUPP);
 +              if (dest)
 +                      return ERR_PTR(-EINVAL);
 +              mutex_lock(&root->chain_lock);
 +              next_ft = find_next_chained_ft(prio);
 +              if (next_ft) {
 +                      gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 +                      gen_dest.ft = next_ft;
 +                      dest = &gen_dest;
 +                      action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 +              } else {
 +                      mutex_unlock(&root->chain_lock);
 +                      return ERR_PTR(-EOPNOTSUPP);
 +              }
 +      }
 +
 +      rule =  _mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
 +                                  match_value, action, flow_tag, dest);
 +
 +      if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 +              if (!IS_ERR_OR_NULL(rule) &&
 +                  (list_empty(&rule->next_ft))) {
 +                      mutex_lock(&next_ft->lock);
 +                      list_add(&rule->next_ft, &next_ft->fwd_rules);
 +                      mutex_unlock(&next_ft->lock);
 +                      rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 +              }
 +              mutex_unlock(&root->chain_lock);
 +      }
 +      return rule;
 +}
  EXPORT_SYMBOL(mlx5_add_flow_rule);
  
  void mlx5_del_flow_rule(struct mlx5_flow_rule *rule)
@@@ -1239,10 -1083,6 +1245,10 @@@ static int disconnect_flow_table(struc
                return 0;
  
        next_ft = find_next_chained_ft(prio);
 +      err = connect_fwd_rules(dev, next_ft, ft);
 +      if (err)
 +              return err;
 +
        err = connect_prev_fts(dev, next_ft, prio);
        if (err)
                mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
@@@ -1292,7 -1132,6 +1298,7 @@@ struct mlx5_flow_namespace *mlx5_get_fl
        case MLX5_FLOW_NAMESPACE_BYPASS:
        case MLX5_FLOW_NAMESPACE_KERNEL:
        case MLX5_FLOW_NAMESPACE_LEFTOVERS:
 +      case MLX5_FLOW_NAMESPACE_ANCHOR:
                prio = type;
                break;
        case MLX5_FLOW_NAMESPACE_FDB:
@@@ -1518,25 -1357,6 +1524,25 @@@ static void set_prio_attrs(struct mlx5_
        }
  }
  
 +#define ANCHOR_PRIO 0
 +#define ANCHOR_SIZE 1
 +static int create_anchor_flow_table(struct mlx5_core_dev
 +                                                      *dev)
 +{
 +      struct mlx5_flow_namespace *ns = NULL;
 +      struct mlx5_flow_table *ft;
 +
 +      ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR);
 +      if (!ns)
 +              return -EINVAL;
 +      ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE);
 +      if (IS_ERR(ft)) {
 +              mlx5_core_err(dev, "Failed to create last anchor flow table");
 +              return PTR_ERR(ft);
 +      }
 +      return 0;
 +}
 +
  static int init_root_ns(struct mlx5_core_dev *dev)
  {
  
  
        set_prio_attrs(dev->priv.root_ns);
  
 +      if (create_anchor_flow_table(dev))
 +              goto cleanup;
 +
        return 0;
  
  cleanup:
@@@ -1581,15 -1398,6 +1587,15 @@@ static void cleanup_single_prio_root_ns
        root_ns = NULL;
  }
  
 +static void destroy_flow_tables(struct fs_prio *prio)
 +{
 +      struct mlx5_flow_table *iter;
 +      struct mlx5_flow_table *tmp;
 +
 +      fs_for_each_ft_safe(iter, tmp, prio)
 +              mlx5_destroy_flow_table(iter);
 +}
 +
  static void cleanup_root_ns(struct mlx5_core_dev *dev)
  {
        struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns;
                                                         list);
  
                                fs_get_obj(obj_iter_prio2, iter_prio2);
 +                              destroy_flow_tables(obj_iter_prio2);
                                if (tree_remove_node(iter_prio2)) {
                                        mlx5_core_warn(dev,
                                                       "Priority %d wasn't destroyed, refcount > 1\n",
index 0916bbc692690c81f13a2a30dfa0a87fd101a399,8b7133de498eedfa7cf5c46638bfd6b02a980309..72a94e72ee250ac2a24cf2223e21f999fd8e5185
@@@ -767,22 -767,6 +767,6 @@@ static int mlx5_core_set_issi(struct ml
        return -ENOTSUPP;
  }
  
- static int map_bf_area(struct mlx5_core_dev *dev)
- {
-       resource_size_t bf_start = pci_resource_start(dev->pdev, 0);
-       resource_size_t bf_len = pci_resource_len(dev->pdev, 0);
-       dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len);
-       return dev->priv.bf_mapping ? 0 : -ENOMEM;
- }
- static void unmap_bf_area(struct mlx5_core_dev *dev)
- {
-       if (dev->priv.bf_mapping)
-               io_mapping_free(dev->priv.bf_mapping);
- }
  static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
  {
        struct mlx5_device_context *dev_ctx;
@@@ -1103,21 -1087,16 +1087,16 @@@ static int mlx5_load_one(struct mlx5_co
                goto err_stop_eqs;
        }
  
-       if (map_bf_area(dev))
-               dev_err(&pdev->dev, "Failed to map blue flame area\n");
        err = mlx5_irq_set_affinity_hints(dev);
-       if (err) {
+       if (err)
                dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
-               goto err_unmap_bf_area;
-       }
  
        MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
  
        mlx5_init_cq_table(dev);
        mlx5_init_qp_table(dev);
        mlx5_init_srq_table(dev);
 -      mlx5_init_mr_table(dev);
 +      mlx5_init_mkey_table(dev);
  
        err = mlx5_init_fs(dev);
        if (err) {
@@@ -1164,15 -1143,11 +1143,11 @@@ err_sriov
  err_reg_dev:
        mlx5_cleanup_fs(dev);
  err_fs:
 -      mlx5_cleanup_mr_table(dev);
 +      mlx5_cleanup_mkey_table(dev);
        mlx5_cleanup_srq_table(dev);
        mlx5_cleanup_qp_table(dev);
        mlx5_cleanup_cq_table(dev);
        mlx5_irq_clear_affinity_hints(dev);
- err_unmap_bf_area:
-       unmap_bf_area(dev);
        free_comp_eqs(dev);
  
  err_stop_eqs:
@@@ -1237,12 -1212,11 +1212,11 @@@ static int mlx5_unload_one(struct mlx5_
  #endif
  
        mlx5_cleanup_fs(dev);
 -      mlx5_cleanup_mr_table(dev);
 +      mlx5_cleanup_mkey_table(dev);
        mlx5_cleanup_srq_table(dev);
        mlx5_cleanup_qp_table(dev);
        mlx5_cleanup_cq_table(dev);
        mlx5_irq_clear_affinity_hints(dev);
-       unmap_bf_area(dev);
        free_comp_eqs(dev);
        mlx5_stop_eqs(dev);
        mlx5_free_uuars(dev, &priv->uuari);
index 5635ce7ad693e7306ee2f121e5f8d7aa78412528,e1f2e1059cfd7948f83afdf6d9fa4d28e9d5c8c1..ae378c575deb28073063b7d216c39664df4c4ae5
@@@ -32,6 -32,7 +32,7 @@@
  
  #include <linux/module.h>
  #include <linux/mlx5/driver.h>
+ #include <linux/mlx5/port.h>
  #include <linux/mlx5/cmd.h>
  #include "mlx5_core.h"
  
@@@ -324,29 -325,6 +325,29 @@@ int mlx5_query_port_vl_hw_cap(struct ml
  }
  EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap);
  
 +int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
 +                           u8 port_num, void *out, size_t sz)
 +{
 +      u32 *in;
 +      int err;
 +
 +      in  = mlx5_vzalloc(sz);
 +      if (!in) {
 +              err = -ENOMEM;
 +              return err;
 +      }
 +
 +      MLX5_SET(ppcnt_reg, in, local_port, port_num);
 +
 +      MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
 +      err = mlx5_core_access_reg(dev, in, sz, out,
 +                                 sz, MLX5_REG_PPCNT, 0, 0);
 +
 +      kvfree(in);
 +      return err;
 +}
 +EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt);
 +
  int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause)
  {
        u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
@@@ -386,3 -364,223 +387,223 @@@ int mlx5_query_port_pause(struct mlx5_c
        return 0;
  }
  EXPORT_SYMBOL_GPL(mlx5_query_port_pause);
+ int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx)
+ {
+       u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+       u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+       memset(in, 0, sizeof(in));
+       MLX5_SET(pfcc_reg, in, local_port, 1);
+       MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx);
+       MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx);
+       MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_tx);
+       MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_rx);
+       return mlx5_core_access_reg(dev, in, sizeof(in), out,
+                                   sizeof(out), MLX5_REG_PFCC, 0, 1);
+ }
+ EXPORT_SYMBOL_GPL(mlx5_set_port_pfc);
+ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
+ {
+       u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+       u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+       int err;
+       memset(in, 0, sizeof(in));
+       MLX5_SET(pfcc_reg, in, local_port, 1);
+       err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+                                  sizeof(out), MLX5_REG_PFCC, 0, 0);
+       if (err)
+               return err;
+       if (pfc_en_tx)
+               *pfc_en_tx = MLX5_GET(pfcc_reg, out, pfctx);
+       if (pfc_en_rx)
+               *pfc_en_rx = MLX5_GET(pfcc_reg, out, pfcrx);
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(mlx5_query_port_pfc);
+ int mlx5_max_tc(struct mlx5_core_dev *mdev)
+ {
+       u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8;
+       return num_tc - 1;
+ }
+ int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
+ {
+       u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+       u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+       int err;
+       int i;
+       memset(in, 0, sizeof(in));
+       for (i = 0; i < 8; i++) {
+               if (prio_tc[i] > mlx5_max_tc(mdev))
+                       return -EINVAL;
+               MLX5_SET(qtct_reg, in, prio, i);
+               MLX5_SET(qtct_reg, in, tclass, prio_tc[i]);
+               err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+                                          sizeof(out), MLX5_REG_QTCT, 0, 1);
+               if (err)
+                       return err;
+       }
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc);
+ static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
+                                  int inlen)
+ {
+       u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+       if (!MLX5_CAP_GEN(mdev, ets))
+               return -ENOTSUPP;
+       return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out),
+                                   MLX5_REG_QETCR, 0, 1);
+ }
+ static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out,
+                                    int outlen)
+ {
+       u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+       if (!MLX5_CAP_GEN(mdev, ets))
+               return -ENOTSUPP;
+       memset(in, 0, sizeof(in));
+       return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen,
+                                   MLX5_REG_QETCR, 0, 0);
+ }
+ int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group)
+ {
+       u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+       int i;
+       memset(in, 0, sizeof(in));
+       for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+               MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1);
+               MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]);
+       }
+       return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+ }
+ EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group);
+ int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
+ {
+       u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+       int i;
+       memset(in, 0, sizeof(in));
+       for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+               MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1);
+               MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]);
+       }
+       return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+ }
+ EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc);
+ int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+                                   u8 *max_bw_value,
+                                   u8 *max_bw_units)
+ {
+       u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+       void *ets_tcn_conf;
+       int i;
+       memset(in, 0, sizeof(in));
+       MLX5_SET(qetc_reg, in, port_number, 1);
+       for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+               ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, in, tc_configuration[i]);
+               MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, r, 1);
+               MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_units,
+                        max_bw_units[i]);
+               MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_value,
+                        max_bw_value[i]);
+       }
+       return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+ }
+ EXPORT_SYMBOL_GPL(mlx5_modify_port_ets_rate_limit);
+ int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+                                  u8 *max_bw_value,
+                                  u8 *max_bw_units)
+ {
+       u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+       void *ets_tcn_conf;
+       int err;
+       int i;
+       err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+       if (err)
+               return err;
+       for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+               ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, tc_configuration[i]);
+               max_bw_value[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+                                          max_bw_value);
+               max_bw_units[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+                                          max_bw_units);
+       }
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit);
+ int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode)
+ {
+       u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)];
+       u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)];
+       memset(in, 0, sizeof(in));
+       memset(out, 0, sizeof(out));
+       MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL);
+       MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1);
+       MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode);
+       return mlx5_cmd_exec_check_status(mdev, in, sizeof(in),
+                                         out, sizeof(out));
+ }
+ EXPORT_SYMBOL_GPL(mlx5_set_port_wol);
+ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
+ {
+       u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)];
+       u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)];
+       int err;
+       memset(in, 0, sizeof(in));
+       memset(out, 0, sizeof(out));
+       MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL);
+       err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in),
+                                        out, sizeof(out));
+       if (!err)
+               *wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode);
+       return err;
+ }
+ EXPORT_SYMBOL_GPL(mlx5_query_port_wol);
index 2eff989c6d9f0d61137fa65dd1f8f7aba0bc5e1b,df28836a1d1100672050bf878a4dba70f40b87c6..bccf17ad588e6b83c028fb1a761da78ce6edc9ba
@@@ -95,8 -95,7 +95,7 @@@ mwifiex_info_read(struct file *file, ch
  
        mwifiex_drv_get_driver_version(priv->adapter, fmt, sizeof(fmt) - 1);
  
-       if (!priv->version_str[0])
-               mwifiex_get_ver_ext(priv);
+       mwifiex_get_ver_ext(priv, 0);
  
        p += sprintf(p, "driver_name = " "\"mwifiex\"\n");
        p += sprintf(p, "driver_version = %s", fmt);
@@@ -583,6 -582,52 +582,52 @@@ done
        return ret;
  }
  
+ /* debugfs verext file write handler.
+  * This function is called when the 'verext' file is opened for write
+  */
+ static ssize_t
+ mwifiex_verext_write(struct file *file, const char __user *ubuf,
+                    size_t count, loff_t *ppos)
+ {
+       int ret;
+       u32 versionstrsel;
+       struct mwifiex_private *priv = (void *)file->private_data;
+       char buf[16];
+       memset(buf, 0, sizeof(buf));
+       if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
+               return -EFAULT;
+       ret = kstrtou32(buf, 10, &versionstrsel);
+       if (ret)
+               return ret;
+       priv->versionstrsel = versionstrsel;
+       return count;
+ }
+ /* Proc verext file read handler.
+  * This function is called when the 'verext' file is opened for reading
+  * This function can be used read driver exteneed verion string.
+  */
+ static ssize_t
+ mwifiex_verext_read(struct file *file, char __user *ubuf,
+                   size_t count, loff_t *ppos)
+ {
+       struct mwifiex_private *priv =
+               (struct mwifiex_private *)file->private_data;
+       char buf[256];
+       int ret;
+       mwifiex_get_ver_ext(priv, priv->versionstrsel);
+       ret = snprintf(buf, sizeof(buf), "version string: %s\n",
+                      priv->version_str);
+       return simple_read_from_buffer(ubuf, count, ppos, buf, ret);
+ }
  /* Proc memrw file write handler.
   * This function is called when the 'memrw' file is opened for writing
   * This function can be used to write to a memory location.
@@@ -880,12 -925,14 +925,12 @@@ mwifiex_reset_write(struct file *file
  {
        struct mwifiex_private *priv = file->private_data;
        struct mwifiex_adapter *adapter = priv->adapter;
 -      char cmd;
        bool result;
 +      int rc;
  
 -      if (copy_from_user(&cmd, ubuf, sizeof(cmd)))
 -              return -EFAULT;
 -
 -      if (strtobool(&cmd, &result))
 -              return -EINVAL;
 +      rc = kstrtobool_from_user(ubuf, count, &result);
 +      if (rc)
 +              return rc;
  
        if (!result)
                return -EINVAL;
@@@ -938,6 -985,7 +983,7 @@@ MWIFIEX_DFS_FILE_OPS(histogram)
  MWIFIEX_DFS_FILE_OPS(debug_mask);
  MWIFIEX_DFS_FILE_OPS(timeshare_coex);
  MWIFIEX_DFS_FILE_WRITE_OPS(reset);
+ MWIFIEX_DFS_FILE_OPS(verext);
  
  /*
   * This function creates the debug FS directory structure and the files.
@@@ -966,6 -1014,7 +1012,7 @@@ mwifiex_dev_debugfs_init(struct mwifiex
        MWIFIEX_DFS_ADD_FILE(debug_mask);
        MWIFIEX_DFS_ADD_FILE(timeshare_coex);
        MWIFIEX_DFS_ADD_FILE(reset);
+       MWIFIEX_DFS_ADD_FILE(verext);
  }
  
  /*
diff --combined include/linux/kernel.h
index f4fa2b29c38c23c05d88237347e1605b95978f36,ac19239572366e05fb6fb194f9bde779c9c43186..b82646ee70eb92b8580cf302dd84eafa60b73189
@@@ -64,7 -64,7 +64,7 @@@
  #define round_down(x, y) ((x) & ~__round_mask(x, y))
  
  #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
- #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+ #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP
  #define DIV_ROUND_UP_ULL(ll,d) \
        ({ unsigned long long _tmp = (ll)+(d)-1; do_div(_tmp, d); _tmp; })
  
@@@ -357,7 -357,6 +357,7 @@@ int __must_check kstrtou16(const char *
  int __must_check kstrtos16(const char *s, unsigned int base, s16 *res);
  int __must_check kstrtou8(const char *s, unsigned int base, u8 *res);
  int __must_check kstrtos8(const char *s, unsigned int base, s8 *res);
 +int __must_check kstrtobool(const char *s, bool *res);
  
  int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res);
  int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res);
@@@ -369,7 -368,6 +369,7 @@@ int __must_check kstrtou16_from_user(co
  int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res);
  int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res);
  int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res);
 +int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res);
  
  static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res)
  {
index 9566b3b3b2c53409fb117c97d24f7e7c57ae2974,68a56bc37df28783bdda30996c1b3f16ece01c49..02ac3000ee3c14e83550e2a9481f5bb82df91422
@@@ -105,29 -105,6 +105,29 @@@ __mlx5_mask(typ, fld)
        ___t; \
  })
  
 +/* Big endian getters */
 +#define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\
 +      __mlx5_64_off(typ, fld)))
 +
 +#define MLX5_GET_BE(type_t, typ, p, fld) ({                             \
 +              type_t tmp;                                               \
 +              switch (sizeof(tmp)) {                                    \
 +              case sizeof(u8):                                          \
 +                      tmp = (__force type_t)MLX5_GET(typ, p, fld);      \
 +                      break;                                            \
 +              case sizeof(u16):                                         \
 +                      tmp = (__force type_t)cpu_to_be16(MLX5_GET(typ, p, fld)); \
 +                      break;                                            \
 +              case sizeof(u32):                                         \
 +                      tmp = (__force type_t)cpu_to_be32(MLX5_GET(typ, p, fld)); \
 +                      break;                                            \
 +              case sizeof(u64):                                         \
 +                      tmp = (__force type_t)MLX5_GET64_BE(typ, p, fld); \
 +                      break;                                            \
 +                      }                                                 \
 +              tmp;                                                      \
 +              })
 +
  enum {
        MLX5_MAX_COMMANDS               = 32,
        MLX5_CMD_DATA_BLOCK_SIZE        = 512,
@@@ -373,6 -350,12 +373,12 @@@ enum 
        MLX5_SET_PORT_PKEY_TABLE        = 20,
  };
  
+ enum {
+       MLX5_BW_NO_LIMIT   = 0,
+       MLX5_100_MBPS_UNIT = 3,
+       MLX5_GBPS_UNIT     = 4,
+ };
  enum {
        MLX5_MAX_PAGE_SHIFT             = 31
  };
@@@ -1200,6 -1183,17 +1206,17 @@@ enum 
        MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM    = 0x1,
  };
  
+ enum mlx5_wol_mode {
+       MLX5_WOL_DISABLE        = 0,
+       MLX5_WOL_SECURED_MAGIC  = 1 << 1,
+       MLX5_WOL_MAGIC          = 1 << 2,
+       MLX5_WOL_ARP            = 1 << 3,
+       MLX5_WOL_BROADCAST      = 1 << 4,
+       MLX5_WOL_MULTICAST      = 1 << 5,
+       MLX5_WOL_UNICAST        = 1 << 6,
+       MLX5_WOL_PHY_ACTIVITY   = 1 << 7,
+ };
  /* MLX5 DEV CAPs */
  
  /* TODO: EAT.ME */
@@@ -1307,8 -1301,7 +1324,8 @@@ enum 
        MLX5_RFC_3635_COUNTERS_GROUP          = 0x3,
        MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5,
        MLX5_PER_PRIORITY_COUNTERS_GROUP      = 0x10,
 -      MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11
 +      MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11,
 +      MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
  };
  
  static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
        return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
  }
  
 -#define MLX5_BY_PASS_NUM_PRIOS 9
 +#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8
 +#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8
 +#define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1
 +#define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\
 +                              MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\
 +                              MLX5_BY_PASS_NUM_MULTICAST_PRIOS)
  
  #endif /* MLX5_DEVICE_H */
index 9108904a6a567c3472a56264a509b2059011e2ed,bb1a880a5bc537c517ed082bf6386b65009e976c..3a954465b2bfc00d81f7fff8d87a4ce038995cb6
@@@ -54,7 -54,7 +54,7 @@@ enum 
        /* one minute for the sake of bringup. Generally, commands must always
         * complete and we may need to increase this timeout value
         */
-       MLX5_CMD_TIMEOUT_MSEC   = 7200 * 1000,
+       MLX5_CMD_TIMEOUT_MSEC   = 60 * 1000,
        MLX5_CMD_WQ_MAX_NAME    = 32,
  };
  
@@@ -99,6 -99,8 +99,8 @@@ enum 
  };
  
  enum {
+       MLX5_REG_QETCR           = 0x4005,
+       MLX5_REG_QTCT            = 0x400a,
        MLX5_REG_PCAP            = 0x5001,
        MLX5_REG_PMTU            = 0x5003,
        MLX5_REG_PTYS            = 0x5004,
@@@ -338,7 -340,7 +340,7 @@@ struct mlx5_core_sig_ctx 
        u32                     sigerr_count;
  };
  
 -struct mlx5_core_mr {
 +struct mlx5_core_mkey {
        u64                     iova;
        u64                     size;
        u32                     key;
@@@ -426,7 -428,7 +428,7 @@@ struct mlx5_srq_table 
        struct radix_tree_root  tree;
  };
  
 -struct mlx5_mr_table {
 +struct mlx5_mkey_table {
        /* protect radix tree
         */
        rwlock_t                lock;
@@@ -458,8 -460,6 +460,6 @@@ struct mlx5_priv 
        struct mlx5_uuar_info   uuari;
        MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock);
  
-       struct io_mapping       *bf_mapping;
        /* pages stuff */
        struct workqueue_struct *pg_wq;
        struct rb_root          page_root;
        struct mlx5_cq_table    cq_table;
        /* end: cq staff */
  
 -      /* start: mr staff */
 -      struct mlx5_mr_table    mr_table;
 -      /* end: mr staff */
 +      /* start: mkey staff */
 +      struct mlx5_mkey_table  mkey_table;
 +      /* end: mkey staff */
  
        /* start: alloc staff */
        /* protect buffer alocation according to numa node */
@@@ -717,7 -717,8 +717,8 @@@ int mlx5_cmd_alloc_uar(struct mlx5_core
  int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
  int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
  int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
- int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
+ int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar,
+                      bool map_wc);
  void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
  void mlx5_health_cleanup(struct mlx5_core_dev *dev);
  int mlx5_health_init(struct mlx5_core_dev *dev);
@@@ -739,18 -740,16 +740,18 @@@ int mlx5_core_query_srq(struct mlx5_cor
                        struct mlx5_query_srq_mbox_out *out);
  int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
                      u16 lwm, int is_srq);
 -void mlx5_init_mr_table(struct mlx5_core_dev *dev);
 -void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev);
 -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 +void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
 +void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev);
 +int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 +                        struct mlx5_core_mkey *mkey,
                          struct mlx5_create_mkey_mbox_in *in, int inlen,
                          mlx5_cmd_cbk_t callback, void *context,
                          struct mlx5_create_mkey_mbox_out *out);
 -int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr);
 -int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
 +                         struct mlx5_core_mkey *mkey);
 +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
                         struct mlx5_query_mkey_mbox_out *out, int outlen);
 -int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
                             u32 *mkey);
  int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
  int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
@@@ -796,37 -795,6 +797,6 @@@ int mlx5_core_access_reg(struct mlx5_co
                         int size_in, void *data_out, int size_out,
                         u16 reg_num, int arg, int write);
  
- int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
- int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
-                        int ptys_size, int proto_mask, u8 local_port);
- int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
-                             u32 *proto_cap, int proto_mask);
- int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
-                               u32 *proto_admin, int proto_mask);
- int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
-                                   u8 *link_width_oper, u8 local_port);
- int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
-                              u8 *proto_oper, int proto_mask,
-                              u8 local_port);
- int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
-                       int proto_mask);
- int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
-                              enum mlx5_port_status status);
- int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
-                                enum mlx5_port_status *status);
- int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port);
- void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port);
- void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
-                             u8 port);
- int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
-                             u8 *vl_hw_cap, u8 local_port);
- int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause);
- int mlx5_query_port_pause(struct mlx5_core_dev *dev,
-                         u32 *rx_pause, u32 *tx_pause);
  int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
  void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
  int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
@@@ -849,8 -817,6 +819,8 @@@ int mlx5_core_destroy_psv(struct mlx5_c
  void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
  int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
                        struct mlx5_odp_caps *odp_caps);
 +int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
 +                           u8 port_num, void *out, size_t sz);
  
  static inline int fw_initializing(struct mlx5_core_dev *dev)
  {
index 9b8a02b7880fd7c26e4ac058439c8c317bc04a4b,9d91ce39eb0f0e9b43f5927c77a3cd5908a35bc7..e52730e01ed68bca8863cab4cca05e339b50cf87
@@@ -166,6 -166,8 +166,8 @@@ enum 
        MLX5_CMD_OP_SET_L2_TABLE_ENTRY            = 0x829,
        MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY          = 0x82a,
        MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY         = 0x82b,
+       MLX5_CMD_OP_SET_WOL_ROL                   = 0x830,
+       MLX5_CMD_OP_QUERY_WOL_ROL                 = 0x831,
        MLX5_CMD_OP_CREATE_TIR                    = 0x900,
        MLX5_CMD_OP_MODIFY_TIR                    = 0x901,
        MLX5_CMD_OP_DESTROY_TIR                   = 0x902,
@@@ -458,8 -460,7 +460,8 @@@ struct mlx5_ifc_ads_bits 
  };
  
  struct mlx5_ifc_flow_table_nic_cap_bits {
 -      u8         reserved_at_0[0x200];
 +      u8         nic_rx_multi_path_tirs[0x1];
 +      u8         reserved_at_1[0x1ff];
  
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
  
@@@ -730,16 -731,26 +732,28 @@@ struct mlx5_ifc_cmd_hca_cap_bits 
  
        u8         reserved_at_1bf[0x3];
        u8         log_max_msg[0x5];
-       u8         reserved_at_1c7[0x18];
+       u8         reserved_at_1c7[0x4];
+       u8         max_tc[0x4];
+       u8         reserved_at_1cf[0x6];
+       u8         rol_s[0x1];
+       u8         rol_g[0x1];
+       u8         reserved_at_1d7[0x1];
+       u8         wol_s[0x1];
+       u8         wol_g[0x1];
+       u8         wol_a[0x1];
+       u8         wol_b[0x1];
+       u8         wol_m[0x1];
+       u8         wol_u[0x1];
+       u8         wol_p[0x1];
  
        u8         stat_rate_support[0x10];
        u8         reserved_at_1ef[0xc];
        u8         cqe_version[0x4];
  
        u8         compact_address_vector[0x1];
 -      u8         reserved_at_200[0xe];
 +      u8         reserved_at_200[0x3];
 +      u8         ipoib_basic_offloads[0x1];
 +      u8         reserved_at_204[0xa];
        u8         drain_sigerr[0x1];
        u8         cmdif_checksum[0x2];
        u8         sigerr_cqe[0x1];
        u8         cd[0x1];
        u8         reserved_at_22c[0x1];
        u8         apm[0x1];
 -      u8         reserved_at_22e[0x7];
 +      u8         reserved_at_22e[0x2];
 +      u8         imaicl[0x1];
 +      u8         reserved_at_231[0x4];
        u8         qkv[0x1];
        u8         pkv[0x1];
 -      u8         reserved_at_237[0x4];
 +      u8         set_deth_sqpn[0x1];
 +      u8         reserved_at_239[0x3];
        u8         xrc[0x1];
        u8         ud[0x1];
        u8         uc[0x1];
@@@ -1214,36 -1222,6 +1228,36 @@@ struct mlx5_ifc_phys_layer_cntrs_bits 
        u8         reserved_at_640[0x180];
  };
  
 +struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits {
 +      u8         symbol_error_counter[0x10];
 +
 +      u8         link_error_recovery_counter[0x8];
 +
 +      u8         link_downed_counter[0x8];
 +
 +      u8         port_rcv_errors[0x10];
 +
 +      u8         port_rcv_remote_physical_errors[0x10];
 +
 +      u8         port_rcv_switch_relay_errors[0x10];
 +
 +      u8         port_xmit_discards[0x10];
 +
 +      u8         port_xmit_constraint_errors[0x8];
 +
 +      u8         port_rcv_constraint_errors[0x8];
 +
 +      u8         reserved_at_70[0x8];
 +
 +      u8         link_overrun_errors[0x8];
 +
 +      u8         reserved_at_80[0x10];
 +
 +      u8         vl_15_dropped[0x10];
 +
 +      u8         reserved_at_a0[0xa0];
 +};
 +
  struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits {
        u8         transmit_queue_high[0x20];
  
@@@ -1816,7 -1794,7 +1830,7 @@@ struct mlx5_ifc_qpc_bits 
        u8         log_sq_size[0x4];
        u8         reserved_at_55[0x6];
        u8         rlky[0x1];
 -      u8         reserved_at_5c[0x4];
 +      u8         ulp_stateless_offload_mode[0x4];
  
        u8         counter_set_id[0x8];
        u8         uar_page[0x18];
@@@ -2654,7 -2632,6 +2668,7 @@@ union mlx5_ifc_eth_cntrs_grp_data_layou
        struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout;
        struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout;
        struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout;
 +      struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout;
        struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
        u8         reserved_at_0[0x7c0];
  };
@@@ -3163,8 -3140,7 +3177,8 @@@ struct mlx5_ifc_query_vport_counter_in_
        u8         op_mod[0x10];
  
        u8         other_vport[0x1];
 -      u8         reserved_at_41[0xf];
 +      u8         reserved_at_41[0xb];
 +      u8         port_num[0x4];
        u8         vport_number[0x10];
  
        u8         reserved_at_60[0x60];
@@@ -6911,6 -6887,54 +6925,54 @@@ struct mlx5_ifc_mtt_bits 
        u8         rd_en[0x1];
  };
  
+ struct mlx5_ifc_query_wol_rol_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+       u8         syndrome[0x20];
+       u8         reserved_at_40[0x10];
+       u8         rol_mode[0x8];
+       u8         wol_mode[0x8];
+       u8         reserved_at_60[0x20];
+ };
+ struct mlx5_ifc_query_wol_rol_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+       u8         reserved_at_40[0x40];
+ };
+ struct mlx5_ifc_set_wol_rol_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+       u8         syndrome[0x20];
+       u8         reserved_at_40[0x40];
+ };
+ struct mlx5_ifc_set_wol_rol_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+       u8         rol_mode_valid[0x1];
+       u8         wol_mode_valid[0x1];
+       u8         reserved_at_42[0xe];
+       u8         rol_mode[0x8];
+       u8         wol_mode[0x8];
+       u8         reserved_at_60[0x20];
+ };
  enum {
        MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER  = 0x0,
        MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED     = 0x1,
@@@ -6994,7 -7018,6 +7056,7 @@@ union mlx5_ifc_ports_control_registers_
        struct mlx5_ifc_peir_reg_bits peir_reg;
        struct mlx5_ifc_pelc_reg_bits pelc_reg;
        struct mlx5_ifc_pfcc_reg_bits pfcc_reg;
 +      struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout;
        struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
        struct mlx5_ifc_pifr_reg_bits pifr_reg;
        struct mlx5_ifc_pipg_reg_bits pipg_reg;
@@@ -7102,4 -7125,49 +7164,49 @@@ struct mlx5_ifc_modify_flow_table_in_bi
        u8         reserved_at_100[0x100];
  };
  
+ struct mlx5_ifc_ets_tcn_config_reg_bits {
+       u8         g[0x1];
+       u8         b[0x1];
+       u8         r[0x1];
+       u8         reserved_at_3[0x9];
+       u8         group[0x4];
+       u8         reserved_at_10[0x9];
+       u8         bw_allocation[0x7];
+       u8         reserved_at_20[0xc];
+       u8         max_bw_units[0x4];
+       u8         reserved_at_30[0x8];
+       u8         max_bw_value[0x8];
+ };
+ struct mlx5_ifc_ets_global_config_reg_bits {
+       u8         reserved_at_0[0x2];
+       u8         r[0x1];
+       u8         reserved_at_3[0x1d];
+       u8         reserved_at_20[0xc];
+       u8         max_bw_units[0x4];
+       u8         reserved_at_30[0x8];
+       u8         max_bw_value[0x8];
+ };
+ struct mlx5_ifc_qetc_reg_bits {
+       u8                                         reserved_at_0[0x8];
+       u8                                         port_number[0x8];
+       u8                                         reserved_at_10[0x30];
+       struct mlx5_ifc_ets_tcn_config_reg_bits    tc_configuration[0x8];
+       struct mlx5_ifc_ets_global_config_reg_bits global_configuration;
+ };
+ struct mlx5_ifc_qtct_reg_bits {
+       u8         reserved_at_0[0x8];
+       u8         port_number[0x8];
+       u8         reserved_at_10[0xd];
+       u8         prio[0x3];
+       u8         reserved_at_20[0x1d];
+       u8         tclass[0x3];
+ };
  #endif /* MLX5_IFC_H */
index 79ec7bbf01557e523aee3ed263425b6fc3360741,0967a246745743b6ee9cd0ed69b947a421ec7bc5..78fda2a69ab86bd45b55eed2a5bc3a4ebd9bbe16
@@@ -468,7 -468,6 +468,7 @@@ struct perf_event 
        int                             group_flags;
        struct perf_event               *group_leader;
        struct pmu                      *pmu;
 +      void                            *pmu_private;
  
        enum perf_event_active_state    state;
        unsigned int                    attach_state;
@@@ -966,11 -965,20 +966,20 @@@ DECLARE_PER_CPU(struct perf_callchain_e
  
  extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
  extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
+ extern struct perf_callchain_entry *
+ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+                  bool crosstask, bool add_mark);
+ extern int get_callchain_buffers(void);
+ extern void put_callchain_buffers(void);
  
- static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
+ static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
  {
-       if (entry->nr < PERF_MAX_STACK_DEPTH)
+       if (entry->nr < PERF_MAX_STACK_DEPTH) {
                entry->ip[entry->nr++] = ip;
+               return 0;
+       } else {
+               return -1; /* no more room, stop walking the stack */
+       }
  }
  
  extern int sysctl_perf_event_paranoid;
@@@ -1110,6 -1118,12 +1119,6 @@@ static inline void perf_event_task_tick
  static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
  #endif
  
 -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
 -extern bool perf_event_can_stop_tick(void);
 -#else
 -static inline bool perf_event_can_stop_tick(void)                     { return true; }
 -#endif
 -
  #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
  extern void perf_restore_debug_store(void);
  #else
index 5a57409da37bba7aaa652ec860044db4061ba4a4,9d237669c52c1b673093edbad06afbf34ea8ff44..e2ac0620d4beb226e16620a940c97bb9cd012d79
@@@ -82,7 -82,7 +82,7 @@@ struct sctp_bind_addr
  struct sctp_ulpq;
  struct sctp_ep_common;
  struct sctp_ssnmap;
 -struct crypto_hash;
 +struct crypto_shash;
  
  
  #include <net/sctp/tsnmap.h>
@@@ -166,7 -166,7 +166,7 @@@ struct sctp_sock 
        struct sctp_pf *pf;
  
        /* Access to HMAC transform. */
 -      struct crypto_hash *hmac;
 +      struct crypto_shash *hmac;
        char *sctp_hmac_alg;
  
        /* What is our base endpointer? */
@@@ -535,7 -535,6 +535,6 @@@ struct sctp_datamsg 
  struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
                                            struct sctp_sndrcvinfo *,
                                            struct iov_iter *);
- void sctp_datamsg_free(struct sctp_datamsg *);
  void sctp_datamsg_put(struct sctp_datamsg *);
  void sctp_chunk_fail(struct sctp_chunk *, int error);
  int sctp_chunk_abandoned(struct sctp_chunk *);
@@@ -656,7 -655,7 +655,7 @@@ void sctp_chunk_free(struct sctp_chunk 
  void  *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data);
  struct sctp_chunk *sctp_chunkify(struct sk_buff *,
                                 const struct sctp_association *,
-                                struct sock *);
+                                struct sock *, gfp_t gfp);
  void sctp_init_addrs(struct sctp_chunk *, union sctp_addr *,
                     union sctp_addr *);
  const union sctp_addr *sctp_source(const struct sctp_chunk *chunk);
@@@ -718,10 -717,10 +717,10 @@@ struct sctp_packet *sctp_packet_init(st
                                     __u16 sport, __u16 dport);
  struct sctp_packet *sctp_packet_config(struct sctp_packet *, __u32 vtag, int);
  sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *,
-                                        struct sctp_chunk *, int);
+                                      struct sctp_chunk *, int, gfp_t);
  sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *,
                                       struct sctp_chunk *);
- int sctp_packet_transmit(struct sctp_packet *);
+ int sctp_packet_transmit(struct sctp_packet *, gfp_t);
  void sctp_packet_free(struct sctp_packet *);
  
  static inline int sctp_packet_empty(struct sctp_packet *packet)
@@@ -1054,7 -1053,7 +1053,7 @@@ struct sctp_outq 
  void sctp_outq_init(struct sctp_association *, struct sctp_outq *);
  void sctp_outq_teardown(struct sctp_outq *);
  void sctp_outq_free(struct sctp_outq*);
- int sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk);
+ int sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t);
  int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *);
  int sctp_outq_is_empty(const struct sctp_outq *);
  void sctp_outq_restart(struct sctp_outq *);
  void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
                     sctp_retransmit_reason_t);
  void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
- int sctp_outq_uncork(struct sctp_outq *);
+ int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
  /* Uncork and flush an outqueue.  */
  static inline void sctp_outq_cork(struct sctp_outq *q)
  {
@@@ -1234,7 -1233,7 +1233,7 @@@ struct sctp_endpoint 
        /* SCTP AUTH: array of the HMACs that will be allocated
         * we need this per association so that we don't serialize
         */
 -      struct crypto_hash **auth_hmacs;
 +      struct crypto_shash **auth_hmacs;
  
        /* SCTP-AUTH: hmacs for the endpoint encoded into parameter */
         struct sctp_hmac_algo_param *auth_hmacs_list;
diff --combined include/net/tcp.h
index b04bc989ad6c77a7ab214207f536722c2b0321ac,c8dbd293daae0fefb323e2163b986e421d61a05b..b91370f61be64a9ef2bcff2d8f20701f5ced09b1
@@@ -27,6 -27,7 +27,6 @@@
  #include <linux/cache.h>
  #include <linux/percpu.h>
  #include <linux/skbuff.h>
 -#include <linux/crypto.h>
  #include <linux/cryptohash.h>
  #include <linux/kref.h>
  #include <linux/ktime.h>
@@@ -238,13 -239,6 +238,6 @@@ extern struct inet_timewait_death_row t
  extern int sysctl_tcp_timestamps;
  extern int sysctl_tcp_window_scaling;
  extern int sysctl_tcp_sack;
- extern int sysctl_tcp_fin_timeout;
- extern int sysctl_tcp_syn_retries;
- extern int sysctl_tcp_synack_retries;
- extern int sysctl_tcp_retries1;
- extern int sysctl_tcp_retries2;
- extern int sysctl_tcp_orphan_retries;
- extern int sysctl_tcp_syncookies;
  extern int sysctl_tcp_fastopen;
  extern int sysctl_tcp_retrans_collapse;
  extern int sysctl_tcp_stdurg;
@@@ -273,7 -267,6 +266,6 @@@ extern int sysctl_tcp_thin_dupack
  extern int sysctl_tcp_early_retrans;
  extern int sysctl_tcp_limit_output_bytes;
  extern int sysctl_tcp_challenge_ack_limit;
- extern unsigned int sysctl_tcp_notsent_lowat;
  extern int sysctl_tcp_min_tso_segs;
  extern int sysctl_tcp_min_rtt_wlen;
  extern int sysctl_tcp_autocorking;
@@@ -567,6 -560,7 +559,7 @@@ void tcp_rearm_rto(struct sock *sk)
  void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
  void tcp_reset(struct sock *sk);
  void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
+ void tcp_fin(struct sock *sk);
  
  /* tcp_timer.c */
  void tcp_init_xmit_timers(struct sock *);
@@@ -962,9 -956,11 +955,11 @@@ static inline void tcp_enable_fack(stru
   */
  static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
  {
+       struct net *net = sock_net((struct sock *)tp);
        tp->do_early_retrans = sysctl_tcp_early_retrans &&
                sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
-               sysctl_tcp_reordering == 3;
+               net->ipv4.sysctl_tcp_reordering == 3;
  }
  
  static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
@@@ -1251,7 -1247,7 +1246,7 @@@ static inline u32 keepalive_time_elapse
  
  static inline int tcp_fin_time(const struct sock *sk)
  {
-       int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
+       int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
        const int rto = inet_csk(sk)->icsk_rto;
  
        if (fin_timeout < (rto << 2) - (rto >> 1))
@@@ -1324,6 -1320,9 +1319,6 @@@ static inline void tcp_clear_all_retran
        tp->retransmit_skb_hint = NULL;
  }
  
 -/* MD5 Signature */
 -struct crypto_hash;
 -
  union tcp_md5_addr {
        struct in_addr  a4;
  #if IS_ENABLED(CONFIG_IPV6)
@@@ -1372,7 -1371,7 +1367,7 @@@ union tcp_md5sum_block 
  
  /* - pool: digest algorithm, hash description and scratch buffer */
  struct tcp_md5sig_pool {
 -      struct hash_desc        md5_desc;
 +      struct ahash_request    *md5_req;
        union tcp_md5sum_block  md5_blk;
  };
  
@@@ -1433,6 -1432,7 +1428,7 @@@ void tcp_free_fastopen_req(struct tcp_s
  
  extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
  int tcp_fastopen_reset_cipher(void *key, unsigned int len);
+ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
  struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
                              struct request_sock *req,
                              struct tcp_fastopen_cookie *foc,
@@@ -1681,7 -1681,8 +1677,8 @@@ void __tcp_v4_send_check(struct sk_buf
  
  static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
  {
-       return tp->notsent_lowat ?: sysctl_tcp_notsent_lowat;
+       struct net *net = sock_net((struct sock *)tp);
+       return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
  }
  
  static inline bool tcp_stream_memory_free(const struct sock *sk)
@@@ -1815,4 -1816,38 +1812,38 @@@ static inline void skb_set_tcp_pure_ack
        skb->truesize = 2;
  }
  
+ static inline int tcp_inq(struct sock *sk)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+       int answ;
+       if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+               answ = 0;
+       } else if (sock_flag(sk, SOCK_URGINLINE) ||
+                  !tp->urg_data ||
+                  before(tp->urg_seq, tp->copied_seq) ||
+                  !before(tp->urg_seq, tp->rcv_nxt)) {
+               answ = tp->rcv_nxt - tp->copied_seq;
+               /* Subtract 1, if FIN was received */
+               if (answ && sock_flag(sk, SOCK_DONE))
+                       answ--;
+       } else {
+               answ = tp->urg_seq - tp->copied_seq;
+       }
+       return answ;
+ }
+ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
+ {
+       u16 segs_in;
+       segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+       tp->segs_in += segs_in;
+       if (skb->len > tcp_hdrlen(skb))
+               tp->data_segs_in += segs_in;
+ }
  #endif        /* _TCP_H */
index 5c9ae6a9b7f5e5b8e953d5a1efc16704a7b1fd69,e25ebcfbcb48b8422a00d34ec783a698b3488efe..0495884defc18f7778fa2c09a5b164e97f87850f
@@@ -138,7 -138,6 +138,7 @@@ header-y += genetlink.
  header-y += gen_stats.h
  header-y += gfs2_ondisk.h
  header-y += gigaset_dev.h
 +header-y += gpio.h
  header-y += gsmmux.h
  header-y += hdlcdrv.h
  header-y += hdlc.h
@@@ -174,6 -173,7 +174,7 @@@ header-y += if_hippi.
  header-y += if_infiniband.h
  header-y += if_link.h
  header-y += if_ltalk.h
+ header-y += if_macsec.h
  header-y += if_packet.h
  header-y += if_phonet.h
  header-y += if_plip.h
diff --combined lib/Kconfig.debug
index f28f7fad452fe462b324761898a4a686581ef90c,60d09e9e8e8b7ad5b11a55c033223d3651178372..eed9987bceb92b773777350396155f3652f2e6e9
@@@ -1442,19 -1442,6 +1442,19 @@@ config DEBUG_BLOCK_EXT_DEV
  
          Say N if you are unsure.
  
 +config CPU_HOTPLUG_STATE_CONTROL
 +      bool "Enable CPU hotplug state control"
 +      depends on DEBUG_KERNEL
 +      depends on HOTPLUG_CPU
 +      default n
 +      help
 +        Allows to write steps between "offline" and "online" to the CPUs
 +        sysfs target file so states can be stepped granular. This is a debug
 +        option for now as the hotplug machinery cannot be stopped and
 +        restarted at arbitrary points yet.
 +
 +        Say N if your are unsure.
 +
  config NOTIFIER_ERROR_INJECTION
        tristate "Notifier error injection"
        depends on DEBUG_KERNEL
@@@ -1766,6 -1753,14 +1766,14 @@@ config TEST_KSTRTO
  config TEST_PRINTF
        tristate "Test printf() family of functions at runtime"
  
+ config TEST_BITMAP
+       tristate "Test bitmap_*() family of functions at runtime"
+       default n
+       help
+         Enable this option to test the bitmap functions at boot.
+         If unsure, say N.
  config TEST_RHASHTABLE
        tristate "Perform selftest on resizable hash table"
        default n
diff --combined net/core/sock.c
index 67e7efe12ff7e727c215ed0c67b1b063537c8eca,4493ff820c2c0a13d5e257f26aae4284aa0b474a..b67b9aedb230f9480d7ae91d8a8a79f5693187a5
@@@ -987,6 -987,10 +987,10 @@@ set_rcvbuf
                sk->sk_incoming_cpu = val;
                break;
  
+       case SO_CNX_ADVICE:
+               if (val == 1)
+                       dst_negative_advice(sk);
+               break;
        default:
                ret = -ENOPROTOOPT;
                break;
@@@ -1531,6 -1535,7 +1535,7 @@@ struct sock *sk_clone_lock(const struc
                        newsk = NULL;
                        goto out;
                }
+               RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
  
                newsk->sk_err      = 0;
                newsk->sk_priority = 0;
@@@ -1903,7 -1908,7 +1908,7 @@@ EXPORT_SYMBOL(sock_cmsg_send)
  bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
  {
        if (pfrag->page) {
 -              if (atomic_read(&pfrag->page->_count) == 1) {
 +              if (page_ref_count(pfrag->page) == 1) {
                        pfrag->offset = 0;
                        return true;
                }
diff --combined net/ipv4/syncookies.c
index 2d5589b61e9faa0fc53c7ddfa198e593a02ddea5,ba0dcffada3b74cdaf8a4c1bf422704541f6d69f..4c04f09338e3410dd75a085674c0fcf355f43164
@@@ -19,8 -19,6 +19,6 @@@
  #include <net/tcp.h>
  #include <net/route.h>
  
- extern int sysctl_tcp_syncookies;
  static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
  
  #define COOKIEBITS 24 /* Upper bits store count */
@@@ -50,7 -48,8 +48,7 @@@
  #define TSBITS        6
  #define TSMASK        (((__u32)1 << TSBITS) - 1)
  
 -static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
 -                    ipv4_cookie_scratch);
 +static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv4_cookie_scratch);
  
  static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
                       u32 count, int c)
@@@ -306,7 -305,7 +304,7 @@@ struct sock *cookie_v4_check(struct soc
        __u8 rcv_wscale;
        struct flowi4 fl4;
  
-       if (!sysctl_tcp_syncookies || !th->ack || th->rst)
+       if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
                goto out;
  
        if (tcp_synq_no_recent_overflow(sk))
diff --combined net/ipv4/tcp.c
index 4804645bdf0200eadf82ba27914edc09bbe24ac5,992b3103ec3eed6faa50d386a0f8971437736144..08b8b960a8edc9e1791c395192746acfc8cb1caa
  
  #define pr_fmt(fmt) "TCP: " fmt
  
 +#include <crypto/hash.h>
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/types.h>
  #include <linux/swap.h>
  #include <linux/cache.h>
  #include <linux/err.h>
 -#include <linux/crypto.h>
  #include <linux/time.h>
  #include <linux/slab.h>
  
  #include <asm/unaligned.h>
  #include <net/busy_poll.h>
  
- int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
  int sysctl_tcp_min_tso_segs __read_mostly = 2;
  
  int sysctl_tcp_autocorking __read_mostly = 1;
@@@ -406,7 -404,7 +404,7 @@@ void tcp_init_sock(struct sock *sk
        tp->mss_cache = TCP_MSS_DEFAULT;
        u64_stats_init(&tp->syncp);
  
-       tp->reordering = sysctl_tcp_reordering;
+       tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
        tcp_enable_early_retrans(tp);
        tcp_assign_congestion_control(sk);
  
@@@ -558,20 -556,7 +556,7 @@@ int tcp_ioctl(struct sock *sk, int cmd
                        return -EINVAL;
  
                slow = lock_sock_fast(sk);
-               if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
-                       answ = 0;
-               else if (sock_flag(sk, SOCK_URGINLINE) ||
-                        !tp->urg_data ||
-                        before(tp->urg_seq, tp->copied_seq) ||
-                        !before(tp->urg_seq, tp->rcv_nxt)) {
-                       answ = tp->rcv_nxt - tp->copied_seq;
-                       /* Subtract 1, if FIN was received */
-                       if (answ && sock_flag(sk, SOCK_DONE))
-                               answ--;
-               } else
-                       answ = tp->urg_seq - tp->copied_seq;
+               answ = tcp_inq(sk);
                unlock_sock_fast(sk, slow);
                break;
        case SIOCATMARK:
@@@ -1466,8 -1451,10 +1451,10 @@@ static struct sk_buff *tcp_recv_skb(str
  
        while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
                offset = seq - TCP_SKB_CB(skb)->seq;
-               if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+               if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+                       pr_err_once("%s: found a SYN, please report !\n", __func__);
                        offset--;
+               }
                if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
                        *off = offset;
                        return skb;
@@@ -1657,8 -1644,10 +1644,10 @@@ int tcp_recvmsg(struct sock *sk, struc
                                break;
  
                        offset = *seq - TCP_SKB_CB(skb)->seq;
-                       if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+                       if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+                               pr_err_once("%s: found a SYN, please report !\n", __func__);
                                offset--;
+                       }
                        if (offset < skb->len)
                                goto found_ok_skb;
                        if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@@ -2326,6 -2315,7 +2315,7 @@@ static int do_tcp_setsockopt(struct soc
  {
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
+       struct net *net = sock_net(sk);
        int val;
        int err = 0;
  
        case TCP_LINGER2:
                if (val < 0)
                        tp->linger2 = -1;
-               else if (val > sysctl_tcp_fin_timeout / HZ)
+               else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ)
                        tp->linger2 = 0;
                else
                        tp->linger2 = val * HZ;
@@@ -2639,6 -2629,7 +2629,7 @@@ void tcp_get_info(struct sock *sk, stru
        const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 now = tcp_time_stamp;
        unsigned int start;
+       int notsent_bytes;
        u64 rate64;
        u32 rate;
  
        } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
        info->tcpi_segs_out = tp->segs_out;
        info->tcpi_segs_in = tp->segs_in;
+       notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt);
+       info->tcpi_notsent_bytes = max(0, notsent_bytes);
+       info->tcpi_min_rtt = tcp_min_rtt(tp);
+       info->tcpi_data_segs_in = tp->data_segs_in;
+       info->tcpi_data_segs_out = tp->data_segs_out;
  }
  EXPORT_SYMBOL_GPL(tcp_get_info);
  
@@@ -2727,6 -2725,7 +2725,7 @@@ static int do_tcp_getsockopt(struct soc
  {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
+       struct net *net = sock_net(sk);
        int val, len;
  
        if (get_user(len, optlen))
                val = keepalive_probes(tp);
                break;
        case TCP_SYNCNT:
-               val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
+               val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
                break;
        case TCP_LINGER2:
                val = tp->linger2;
                if (val >= 0)
-                       val = (val ? : sysctl_tcp_fin_timeout) / HZ;
+                       val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
                break;
        case TCP_DEFER_ACCEPT:
                val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
@@@ -2943,26 -2942,17 +2942,26 @@@ static bool tcp_md5sig_pool_populated 
  
  static void __tcp_alloc_md5sig_pool(void)
  {
 +      struct crypto_ahash *hash;
        int cpu;
  
 +      hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
 +      if (IS_ERR(hash))
 +              return;
 +
        for_each_possible_cpu(cpu) {
 -              if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) {
 -                      struct crypto_hash *hash;
 +              struct ahash_request *req;
  
 -                      hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
 -                      if (IS_ERR(hash))
 -                              return;
 -                      per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
 -              }
 +              if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
 +                      continue;
 +
 +              req = ahash_request_alloc(hash, GFP_KERNEL);
 +              if (!req)
 +                      return;
 +
 +              ahash_request_set_callback(req, 0, NULL, NULL);
 +
 +              per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
        }
        /* before setting tcp_md5sig_pool_populated, we must commit all writes
         * to memory. See smp_rmb() in tcp_get_md5sig_pool()
@@@ -3012,6 -3002,7 +3011,6 @@@ int tcp_md5_hash_header(struct tcp_md5s
  {
        struct scatterlist sg;
        struct tcphdr hdr;
 -      int err;
  
        /* We are not allowed to change tcphdr, make a local copy */
        memcpy(&hdr, th, sizeof(hdr));
  
        /* options aren't included in the hash */
        sg_init_one(&sg, &hdr, sizeof(hdr));
 -      err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr));
 -      return err;
 +      ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(hdr));
 +      return crypto_ahash_update(hp->md5_req);
  }
  EXPORT_SYMBOL(tcp_md5_hash_header);
  
@@@ -3029,7 -3020,7 +3028,7 @@@ int tcp_md5_hash_skb_data(struct tcp_md
  {
        struct scatterlist sg;
        const struct tcphdr *tp = tcp_hdr(skb);
 -      struct hash_desc *desc = &hp->md5_desc;
 +      struct ahash_request *req = hp->md5_req;
        unsigned int i;
        const unsigned int head_data_len = skb_headlen(skb) > header_len ?
                                           skb_headlen(skb) - header_len : 0;
        sg_init_table(&sg, 1);
  
        sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
 -      if (crypto_hash_update(desc, &sg, head_data_len))
 +      ahash_request_set_crypt(req, &sg, NULL, head_data_len);
 +      if (crypto_ahash_update(req))
                return 1;
  
        for (i = 0; i < shi->nr_frags; ++i) {
  
                sg_set_page(&sg, page, skb_frag_size(f),
                            offset_in_page(offset));
 -              if (crypto_hash_update(desc, &sg, skb_frag_size(f)))
 +              ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
 +              if (crypto_ahash_update(req))
                        return 1;
        }
  
@@@ -3068,8 -3057,7 +3067,8 @@@ int tcp_md5_hash_key(struct tcp_md5sig_
        struct scatterlist sg;
  
        sg_init_one(&sg, key->key, key->keylen);
 -      return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
 +      ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen);
 +      return crypto_ahash_update(hp->md5_req);
  }
  EXPORT_SYMBOL(tcp_md5_hash_key);
  
diff --combined net/ipv4/tcp_fastopen.c
index 4c65ca1a86d1033a8034a7d4881e19d5ad711cc4,4fc0061bebf482e88aea7fbe3c78ac26a33a16c5..cffd8f9ed1a953031e0a08c090c17c0bcd62effa
@@@ -1,4 -1,3 +1,4 @@@
 +#include <linux/crypto.h>
  #include <linux/err.h>
  #include <linux/init.h>
  #include <linux/kernel.h>
@@@ -125,6 -124,49 +125,49 @@@ static bool tcp_fastopen_cookie_gen(str
        return false;
  }
  
+ /* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
+  * queue this additional data / FIN.
+  */
+ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+       if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
+               return;
+       skb = skb_clone(skb, GFP_ATOMIC);
+       if (!skb)
+               return;
+       skb_dst_drop(skb);
+       /* segs_in has been initialized to 1 in tcp_create_openreq_child().
+        * Hence, reset segs_in to 0 before calling tcp_segs_in()
+        * to avoid double counting.  Also, tcp_segs_in() expects
+        * skb->len to include the tcp_hdrlen.  Hence, it should
+        * be called before __skb_pull().
+        */
+       tp->segs_in = 0;
+       tcp_segs_in(tp, skb);
+       __skb_pull(skb, tcp_hdrlen(skb));
+       skb_set_owner_r(skb, sk);
+       TCP_SKB_CB(skb)->seq++;
+       TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
+       tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+       __skb_queue_tail(&sk->sk_receive_queue, skb);
+       tp->syn_data_acked = 1;
+       /* u64_stats_update_begin(&tp->syncp) not needed here,
+        * as we certainly are not changing upper 32bit value (0)
+        */
+       tp->bytes_received = skb->len;
+       if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+               tcp_fin(sk);
+ }
  static struct sock *tcp_fastopen_create_child(struct sock *sk,
                                              struct sk_buff *skb,
                                              struct dst_entry *dst,
        struct tcp_sock *tp;
        struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
        struct sock *child;
-       u32 end_seq;
        bool own_req;
  
        req->num_retrans = 0;
        tcp_init_metrics(child);
        tcp_init_buffer_space(child);
  
-       /* Queue the data carried in the SYN packet.
-        * We used to play tricky games with skb_get().
-        * With lockless listener, it is a dead end.
-        * Do not think about it.
-        *
-        * XXX (TFO) - we honor a zero-payload TFO request for now,
-        * (any reason not to?) but no need to queue the skb since
-        * there is no data. How about SYN+FIN?
-        */
-       end_seq = TCP_SKB_CB(skb)->end_seq;
-       if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
-               struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
-               if (likely(skb2)) {
-                       skb_dst_drop(skb2);
-                       __skb_pull(skb2, tcp_hdrlen(skb));
-                       skb_set_owner_r(skb2, child);
-                       __skb_queue_tail(&child->sk_receive_queue, skb2);
-                       tp->syn_data_acked = 1;
-                       /* u64_stats_update_begin(&tp->syncp) not needed here,
-                        * as we certainly are not changing upper 32bit value (0)
-                        */
-                       tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
-               } else {
-                       end_seq = TCP_SKB_CB(skb)->seq + 1;
-               }
-       }
-       tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
+       tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+       tcp_fastopen_add_skb(child, skb);
+       tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
        /* tcp_conn_request() is sending the SYNACK,
         * and queues the child into listener accept queue.
         */
diff --combined net/ipv4/tcp_ipv4.c
index 4fdbf4e56797dd6e6084613c7321e46b5c856578,e7528b101e680db7f980c05033133f10c1679a04..ad450509029bceb74e324c096fbf0e1ab413b7cf
@@@ -81,7 -81,7 +81,7 @@@
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
  
 -#include <linux/crypto.h>
 +#include <crypto/hash.h>
  #include <linux/scatterlist.h>
  
  int sysctl_tcp_tw_reuse __read_mostly;
@@@ -319,8 -319,6 +319,6 @@@ void tcp_req_err(struct sock *sk, u32 s
        /* ICMPs are not backlogged, hence we cannot get
         * an established socket here.
         */
-       WARN_ON(req->sk);
        if (seq != tcp_rsk(req)->snt_isn) {
                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
        } else if (abort) {
@@@ -642,8 -640,8 +640,8 @@@ static void tcp_v4_send_reset(const str
                 * Incoming packet is checked with md5 hash with finding key,
                 * no RST generated if md5 hash doesn't match.
                 */
-               sk1 = __inet_lookup_listener(net,
-                                            &tcp_hashinfo, ip_hdr(skb)->saddr,
+               sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
+                                            ip_hdr(skb)->saddr,
                                             th->source, ip_hdr(skb)->daddr,
                                             ntohs(th->source), inet_iif(skb));
                /* don't send rst if it can't find key */
@@@ -865,7 -863,6 +863,6 @@@ static void tcp_v4_reqsk_destructor(str
        kfree(inet_rsk(req)->opt);
  }
  
  #ifdef CONFIG_TCP_MD5SIG
  /*
   * RFC2385 MD5 checksumming requires a mapping of
@@@ -1039,22 -1036,21 +1036,22 @@@ static int tcp_v4_md5_hash_pseudoheader
        bp->len = cpu_to_be16(nbytes);
  
        sg_init_one(&sg, bp, sizeof(*bp));
 -      return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
 +      ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
 +      return crypto_ahash_update(hp->md5_req);
  }
  
  static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
                               __be32 daddr, __be32 saddr, const struct tcphdr *th)
  {
        struct tcp_md5sig_pool *hp;
 -      struct hash_desc *desc;
 +      struct ahash_request *req;
  
        hp = tcp_get_md5sig_pool();
        if (!hp)
                goto clear_hash_noput;
 -      desc = &hp->md5_desc;
 +      req = hp->md5_req;
  
 -      if (crypto_hash_init(desc))
 +      if (crypto_ahash_init(req))
                goto clear_hash;
        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
                goto clear_hash;
                goto clear_hash;
        if (tcp_md5_hash_key(hp, key))
                goto clear_hash;
 -      if (crypto_hash_final(desc, md5_hash))
 +      ahash_request_set_crypt(req, NULL, md5_hash, 0);
 +      if (crypto_ahash_final(req))
                goto clear_hash;
  
        tcp_put_md5sig_pool();
@@@ -1081,7 -1076,7 +1078,7 @@@ int tcp_v4_md5_hash_skb(char *md5_hash
                        const struct sk_buff *skb)
  {
        struct tcp_md5sig_pool *hp;
 -      struct hash_desc *desc;
 +      struct ahash_request *req;
        const struct tcphdr *th = tcp_hdr(skb);
        __be32 saddr, daddr;
  
        hp = tcp_get_md5sig_pool();
        if (!hp)
                goto clear_hash_noput;
 -      desc = &hp->md5_desc;
 +      req = hp->md5_req;
  
 -      if (crypto_hash_init(desc))
 +      if (crypto_ahash_init(req))
                goto clear_hash;
  
        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
                goto clear_hash;
        if (tcp_md5_hash_key(hp, key))
                goto clear_hash;
 -      if (crypto_hash_final(desc, md5_hash))
 +      ahash_request_set_crypt(req, NULL, md5_hash, 0);
 +      if (crypto_ahash_final(req))
                goto clear_hash;
  
        tcp_put_md5sig_pool();
@@@ -1590,7 -1584,8 +1587,8 @@@ int tcp_v4_rcv(struct sk_buff *skb
        TCP_SKB_CB(skb)->sacked  = 0;
  
  lookup:
-       sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
+       sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
+                              th->dest);
        if (!sk)
                goto no_tcp_socket;
  
@@@ -1653,7 -1648,7 +1651,7 @@@ process
        sk_incoming_cpu_update(sk);
  
        bh_lock_sock_nested(sk);
-       tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+       tcp_segs_in(tcp_sk(sk), skb);
        ret = 0;
        if (!sock_owned_by_user(sk)) {
                if (!tcp_prequeue(sk, skb))
@@@ -1706,7 -1701,8 +1704,8 @@@ do_time_wait
        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
        case TCP_TW_SYN: {
                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
-                                                       &tcp_hashinfo,
+                                                       &tcp_hashinfo, skb,
+                                                       __tcp_hdrlen(th),
                                                        iph->saddr, th->source,
                                                        iph->daddr, th->dest,
                                                        inet_iif(skb));
@@@ -2398,6 -2394,16 +2397,16 @@@ static int __net_init tcp_sk_init(struc
        net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
        net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
  
+       net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
+       net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
+       net->ipv4.sysctl_tcp_syncookies = 1;
+       net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
+       net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
+       net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
+       net->ipv4.sysctl_tcp_orphan_retries = 0;
+       net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+       net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
        return 0;
  fail:
        tcp_sk_exit(net);
diff --combined net/ipv6/syncookies.c
index aae3e5ca63ea9a9a7b7822d3905330562a5d8a3c,0e393ff7f5d07e7294df6cda18deddad568bddbb..aab91fa86c5e71aaaf2f96308e7b7d8918e959b5
@@@ -41,7 -41,8 +41,7 @@@ static __u16 const msstab[] = 
        9000 - 60,
  };
  
 -static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
 -                    ipv6_cookie_scratch);
 +static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch);
  
  static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
                       __be16 sport, __be16 dport, u32 count, int c)
@@@ -147,7 -148,7 +147,7 @@@ struct sock *cookie_v6_check(struct soc
        struct dst_entry *dst;
        __u8 rcv_wscale;
  
-       if (!sysctl_tcp_syncookies || !th->ack || th->rst)
+       if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
                goto out;
  
        if (tcp_synq_no_recent_overflow(sk))
diff --combined net/ipv6/tcp_ipv6.c
index 3447859bdc579e2829e17a989cfc02e6071d932b,9c16565b70cc56b9b796f1b98ab8399831f65a42..711d209f912473b28eddf3eb7f83a51d568d9f4d
@@@ -66,7 -66,7 +66,7 @@@
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
  
 -#include <linux/crypto.h>
 +#include <crypto/hash.h>
  #include <linux/scatterlist.h>
  
  static void   tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
@@@ -541,8 -541,7 +541,8 @@@ static int tcp_v6_md5_hash_pseudoheader
        bp->len = cpu_to_be32(nbytes);
  
        sg_init_one(&sg, bp, sizeof(*bp));
 -      return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
 +      ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
 +      return crypto_ahash_update(hp->md5_req);
  }
  
  static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
                               const struct tcphdr *th)
  {
        struct tcp_md5sig_pool *hp;
 -      struct hash_desc *desc;
 +      struct ahash_request *req;
  
        hp = tcp_get_md5sig_pool();
        if (!hp)
                goto clear_hash_noput;
 -      desc = &hp->md5_desc;
 +      req = hp->md5_req;
  
 -      if (crypto_hash_init(desc))
 +      if (crypto_ahash_init(req))
                goto clear_hash;
        if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
                goto clear_hash;
                goto clear_hash;
        if (tcp_md5_hash_key(hp, key))
                goto clear_hash;
 -      if (crypto_hash_final(desc, md5_hash))
 +      ahash_request_set_crypt(req, NULL, md5_hash, 0);
 +      if (crypto_ahash_final(req))
                goto clear_hash;
  
        tcp_put_md5sig_pool();
@@@ -586,7 -584,7 +586,7 @@@ static int tcp_v6_md5_hash_skb(char *md
  {
        const struct in6_addr *saddr, *daddr;
        struct tcp_md5sig_pool *hp;
 -      struct hash_desc *desc;
 +      struct ahash_request *req;
        const struct tcphdr *th = tcp_hdr(skb);
  
        if (sk) { /* valid for establish/request sockets */
        hp = tcp_get_md5sig_pool();
        if (!hp)
                goto clear_hash_noput;
 -      desc = &hp->md5_desc;
 +      req = hp->md5_req;
  
 -      if (crypto_hash_init(desc))
 +      if (crypto_ahash_init(req))
                goto clear_hash;
  
        if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
                goto clear_hash;
        if (tcp_md5_hash_key(hp, key))
                goto clear_hash;
 -      if (crypto_hash_final(desc, md5_hash))
 +      ahash_request_set_crypt(req, NULL, md5_hash, 0);
 +      if (crypto_ahash_final(req))
                goto clear_hash;
  
        tcp_put_md5sig_pool();
@@@ -870,7 -867,8 +870,8 @@@ static void tcp_v6_send_reset(const str
                 * no RST generated if md5 hash doesn't match.
                 */
                sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
-                                          &tcp_hashinfo, &ipv6h->saddr,
+                                          &tcp_hashinfo, NULL, 0,
+                                          &ipv6h->saddr,
                                           th->source, &ipv6h->daddr,
                                           ntohs(th->source), tcp_v6_iif(skb));
                if (!sk1)
@@@ -1379,8 -1377,8 +1380,8 @@@ static int tcp_v6_rcv(struct sk_buff *s
        hdr = ipv6_hdr(skb);
  
  lookup:
-       sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
-                               inet6_iif(skb));
+       sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
+                               th->source, th->dest, inet6_iif(skb));
        if (!sk)
                goto no_tcp_socket;
  
@@@ -1445,7 -1443,7 +1446,7 @@@ process
        sk_incoming_cpu_update(sk);
  
        bh_lock_sock_nested(sk);
-       tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+       tcp_segs_in(tcp_sk(sk), skb);
        ret = 0;
        if (!sock_owned_by_user(sk)) {
                if (!tcp_prequeue(sk, skb))
@@@ -1504,6 -1502,7 +1505,7 @@@ do_time_wait
                struct sock *sk2;
  
                sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+                                           skb, __tcp_hdrlen(th),
                                            &ipv6_hdr(skb)->saddr, th->source,
                                            &ipv6_hdr(skb)->daddr,
                                            ntohs(th->dest), tcp_v6_iif(skb));
@@@ -1869,7 -1868,7 +1871,7 @@@ struct proto tcpv6_prot = 
        .sendpage               = tcp_sendpage,
        .backlog_rcv            = tcp_v6_do_rcv,
        .release_cb             = tcp_release_cb,
-       .hash                   = inet_hash,
+       .hash                   = inet6_hash,
        .unhash                 = inet_unhash,
        .get_port               = inet_csk_get_port,
        .enter_memory_pressure  = tcp_enter_memory_pressure,
diff --combined net/rxrpc/ar-internal.h
index 71598f5b11b71db20495a095eb167c5e909f8b0e,a3002f4ddc906fda10953f1084793c0163ce13ff..cd6cdbe87125a827186b85cbd8286dea9d8aab3e
@@@ -16,7 -16,7 +16,7 @@@
        BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
               (POISON_FREE << 8 | POISON_FREE))
  #else
- #define CHECK_SLAB_OKAY(X) do {} while(0)
+ #define CHECK_SLAB_OKAY(X) do {} while (0)
  #endif
  
  #define FCRYPT_BSIZE 8
@@@ -70,11 -70,30 +70,30 @@@ struct rxrpc_sock 
  #define RXRPC_SECURITY_MAX    RXRPC_SECURITY_ENCRYPT
        struct sockaddr_rxrpc   srx;            /* local address */
        sa_family_t             proto;          /* protocol created with */
-       __be16                  service_id;     /* service ID of local/remote service */
  };
  
  #define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk)
  
+ /*
+  * CPU-byteorder normalised Rx packet header.
+  */
+ struct rxrpc_host_header {
+       u32             epoch;          /* client boot timestamp */
+       u32             cid;            /* connection and channel ID */
+       u32             callNumber;     /* call ID (0 for connection-level packets) */
+       u32             seq;            /* sequence number of pkt in call stream */
+       u32             serial;         /* serial number of pkt sent to network */
+       u8              type;           /* packet type */
+       u8              flags;          /* packet flags */
+       u8              userStatus;     /* app-layer defined status */
+       u8              securityIndex;  /* security protocol ID */
+       union {
+               u16     _rsvd;          /* reserved */
+               u16     cksum;          /* kerberos security checksum */
+       };
+       u16             serviceId;      /* service ID */
+ } __packed;
  /*
   * RxRPC socket buffer private variables
   * - max 48 bytes (struct sk_buff::cb)
@@@ -89,7 -108,7 +108,7 @@@ struct rxrpc_skb_priv 
                bool            need_resend;    /* T if needs resending */
        };
  
-       struct rxrpc_header     hdr;            /* RxRPC packet header from this packet */
+       struct rxrpc_host_header hdr;           /* RxRPC packet header from this packet */
  };
  
  #define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
@@@ -230,7 -249,7 +249,7 @@@ struct rxrpc_conn_bundle 
        atomic_t                usage;
        int                     debug_id;       /* debug ID for printks */
        unsigned short          num_conns;      /* number of connections in this bundle */
-       __be16                  service_id;     /* service ID */
+       u16                     service_id;     /* Service ID for this bundle */
        u8                      security_ix;    /* security type */
  };
  
@@@ -252,7 -271,7 +271,7 @@@ struct rxrpc_connection 
        struct rxrpc_security   *security;      /* applied security module */
        struct key              *key;           /* security for this connection (client) */
        struct key              *server_key;    /* security for this service */
 -      struct crypto_blkcipher *cipher;        /* encryption handle */
 +      struct crypto_skcipher  *cipher;        /* encryption handle */
        struct rxrpc_crypt      csum_iv;        /* packet checksum base */
        unsigned long           events;
  #define RXRPC_CONN_CHALLENGE  0               /* send challenge packet */
        rwlock_t                lock;           /* access lock */
        spinlock_t              state_lock;     /* state-change lock */
        atomic_t                usage;
-       u32                     real_conn_id;   /* connection ID (host-endian) */
        enum {                                  /* current state of connection */
                RXRPC_CONN_UNUSED,              /* - connection not yet attempted */
                RXRPC_CONN_CLIENT,              /* - client connection */
        u8                      security_size;  /* security header size */
        u32                     security_level; /* security level negotiated */
        u32                     security_nonce; /* response re-use preventer */
-       /* the following are all in net order */
-       __be32                  epoch;          /* epoch of this connection */
-       __be32                  cid;            /* connection ID */
-       __be16                  service_id;     /* service ID */
+       u32                     epoch;          /* epoch of this connection */
+       u32                     cid;            /* connection ID */
+       u16                     service_id;     /* service ID for this connection */
        u8                      security_ix;    /* security type */
        u8                      in_clientflag;  /* RXRPC_CLIENT_INITIATED if we are server */
        u8                      out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
  };
  
+ /*
+  * Flags in call->flags.
+  */
+ enum rxrpc_call_flag {
+       RXRPC_CALL_RELEASED,            /* call has been released - no more message to userspace */
+       RXRPC_CALL_TERMINAL_MSG,        /* call has given the socket its final message */
+       RXRPC_CALL_RCVD_LAST,           /* all packets received */
+       RXRPC_CALL_RUN_RTIMER,          /* Tx resend timer started */
+       RXRPC_CALL_TX_SOFT_ACK,         /* sent some soft ACKs */
+       RXRPC_CALL_PROC_BUSY,           /* the processor is busy */
+       RXRPC_CALL_INIT_ACCEPT,         /* acceptance was initiated */
+       RXRPC_CALL_HAS_USERID,          /* has a user ID attached */
+       RXRPC_CALL_EXPECT_OOS,          /* expect out of sequence packets */
+ };
+ /*
+  * Events that can be raised on a call.
+  */
+ enum rxrpc_call_event {
+       RXRPC_CALL_EV_RCVD_ACKALL,      /* ACKALL or reply received */
+       RXRPC_CALL_EV_RCVD_BUSY,        /* busy packet received */
+       RXRPC_CALL_EV_RCVD_ABORT,       /* abort packet received */
+       RXRPC_CALL_EV_RCVD_ERROR,       /* network error received */
+       RXRPC_CALL_EV_ACK_FINAL,        /* need to generate final ACK (and release call) */
+       RXRPC_CALL_EV_ACK,              /* need to generate ACK */
+       RXRPC_CALL_EV_REJECT_BUSY,      /* need to generate busy message */
+       RXRPC_CALL_EV_ABORT,            /* need to generate abort */
+       RXRPC_CALL_EV_CONN_ABORT,       /* local connection abort generated */
+       RXRPC_CALL_EV_RESEND_TIMER,     /* Tx resend timer expired */
+       RXRPC_CALL_EV_RESEND,           /* Tx resend required */
+       RXRPC_CALL_EV_DRAIN_RX_OOS,     /* drain the Rx out of sequence queue */
+       RXRPC_CALL_EV_LIFE_TIMER,       /* call's lifetimer ran out */
+       RXRPC_CALL_EV_ACCEPTED,         /* incoming call accepted by userspace app */
+       RXRPC_CALL_EV_SECURED,          /* incoming call's connection is now secure */
+       RXRPC_CALL_EV_POST_ACCEPT,      /* need to post an "accept?" message to the app */
+       RXRPC_CALL_EV_RELEASE,          /* need to release the call's resources */
+ };
+ /*
+  * The states that a call can be in.
+  */
+ enum rxrpc_call_state {
+       RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
+       RXRPC_CALL_CLIENT_AWAIT_REPLY,  /* - client awaiting reply */
+       RXRPC_CALL_CLIENT_RECV_REPLY,   /* - client receiving reply phase */
+       RXRPC_CALL_CLIENT_FINAL_ACK,    /* - client sending final ACK phase */
+       RXRPC_CALL_SERVER_SECURING,     /* - server securing request connection */
+       RXRPC_CALL_SERVER_ACCEPTING,    /* - server accepting request */
+       RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
+       RXRPC_CALL_SERVER_ACK_REQUEST,  /* - server pending ACK of request */
+       RXRPC_CALL_SERVER_SEND_REPLY,   /* - server sending reply */
+       RXRPC_CALL_SERVER_AWAIT_ACK,    /* - server awaiting final ACK */
+       RXRPC_CALL_COMPLETE,            /* - call completed */
+       RXRPC_CALL_SERVER_BUSY,         /* - call rejected by busy server */
+       RXRPC_CALL_REMOTELY_ABORTED,    /* - call aborted by peer */
+       RXRPC_CALL_LOCALLY_ABORTED,     /* - call aborted locally on error or close */
+       RXRPC_CALL_NETWORK_ERROR,       /* - call terminated by network error */
+       RXRPC_CALL_DEAD,                /* - call is dead */
+       NR__RXRPC_CALL_STATES
+ };
  /*
   * RxRPC call definition
   * - matched by { connection, call_id }
@@@ -317,57 -394,13 +394,13 @@@ struct rxrpc_call 
        unsigned long           user_call_ID;   /* user-defined call ID */
        unsigned long           creation_jif;   /* time of call creation */
        unsigned long           flags;
- #define RXRPC_CALL_RELEASED   0       /* call has been released - no more message to userspace */
- #define RXRPC_CALL_TERMINAL_MSG       1       /* call has given the socket its final message */
- #define RXRPC_CALL_RCVD_LAST  2       /* all packets received */
- #define RXRPC_CALL_RUN_RTIMER 3       /* Tx resend timer started */
- #define RXRPC_CALL_TX_SOFT_ACK        4       /* sent some soft ACKs */
- #define RXRPC_CALL_PROC_BUSY  5       /* the processor is busy */
- #define RXRPC_CALL_INIT_ACCEPT        6       /* acceptance was initiated */
- #define RXRPC_CALL_HAS_USERID 7       /* has a user ID attached */
- #define RXRPC_CALL_EXPECT_OOS 8       /* expect out of sequence packets */
        unsigned long           events;
- #define RXRPC_CALL_RCVD_ACKALL        0       /* ACKALL or reply received */
- #define RXRPC_CALL_RCVD_BUSY  1       /* busy packet received */
- #define RXRPC_CALL_RCVD_ABORT 2       /* abort packet received */
- #define RXRPC_CALL_RCVD_ERROR 3       /* network error received */
- #define RXRPC_CALL_ACK_FINAL  4       /* need to generate final ACK (and release call) */
- #define RXRPC_CALL_ACK                5       /* need to generate ACK */
- #define RXRPC_CALL_REJECT_BUSY        6       /* need to generate busy message */
- #define RXRPC_CALL_ABORT      7       /* need to generate abort */
- #define RXRPC_CALL_CONN_ABORT 8       /* local connection abort generated */
- #define RXRPC_CALL_RESEND_TIMER       9       /* Tx resend timer expired */
- #define RXRPC_CALL_RESEND     10      /* Tx resend required */
- #define RXRPC_CALL_DRAIN_RX_OOS       11      /* drain the Rx out of sequence queue */
- #define RXRPC_CALL_LIFE_TIMER 12      /* call's lifetimer ran out */
- #define RXRPC_CALL_ACCEPTED   13      /* incoming call accepted by userspace app */
- #define RXRPC_CALL_SECURED    14      /* incoming call's connection is now secure */
- #define RXRPC_CALL_POST_ACCEPT        15      /* need to post an "accept?" message to the app */
- #define RXRPC_CALL_RELEASE    16      /* need to release the call's resources */
        spinlock_t              lock;
        rwlock_t                state_lock;     /* lock for state transition */
        atomic_t                usage;
        atomic_t                sequence;       /* Tx data packet sequence counter */
        u32                     abort_code;     /* local/remote abort code */
-       enum {                                  /* current state of call */
-               RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
-               RXRPC_CALL_CLIENT_AWAIT_REPLY,  /* - client awaiting reply */
-               RXRPC_CALL_CLIENT_RECV_REPLY,   /* - client receiving reply phase */
-               RXRPC_CALL_CLIENT_FINAL_ACK,    /* - client sending final ACK phase */
-               RXRPC_CALL_SERVER_SECURING,     /* - server securing request connection */
-               RXRPC_CALL_SERVER_ACCEPTING,    /* - server accepting request */
-               RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
-               RXRPC_CALL_SERVER_ACK_REQUEST,  /* - server pending ACK of request */
-               RXRPC_CALL_SERVER_SEND_REPLY,   /* - server sending reply */
-               RXRPC_CALL_SERVER_AWAIT_ACK,    /* - server awaiting final ACK */
-               RXRPC_CALL_COMPLETE,            /* - call completed */
-               RXRPC_CALL_SERVER_BUSY,         /* - call rejected by busy server */
-               RXRPC_CALL_REMOTELY_ABORTED,    /* - call aborted by peer */
-               RXRPC_CALL_LOCALLY_ABORTED,     /* - call aborted locally on error or close */
-               RXRPC_CALL_NETWORK_ERROR,       /* - call terminated by network error */
-               RXRPC_CALL_DEAD,                /* - call is dead */
-       } state;
+       enum rxrpc_call_state   state : 8;      /* current state of call */
        int                     debug_id;       /* debug ID for printks */
        u8                      channel;        /* connection channel occupied by this call */
  
        rxrpc_seq_t             rx_data_eaten;  /* last data seq ID consumed by recvmsg */
        rxrpc_seq_t             rx_first_oos;   /* first packet in rx_oos_queue (or 0) */
        rxrpc_seq_t             ackr_win_top;   /* top of ACK window (rx_data_eaten is bottom) */
-       rxrpc_seq_net_t         ackr_prev_seq;  /* previous sequence number received */
+       rxrpc_seq_t             ackr_prev_seq;  /* previous sequence number received */
        u8                      ackr_reason;    /* reason to ACK */
-       __be32                  ackr_serial;    /* serial of packet being ACK'd */
+       rxrpc_serial_t          ackr_serial;    /* serial of packet being ACK'd */
        atomic_t                ackr_not_idle;  /* number of packets in Rx queue */
  
        /* received packet records, 1 bit per record */
        u8                      in_clientflag;  /* Copy of conn->in_clientflag for hashing */
        struct rxrpc_local      *local;         /* Local endpoint. Used for hashing. */
        sa_family_t             proto;          /* Frame protocol */
-       /* the following should all be in net order */
-       __be32                  cid;            /* connection ID + channel index  */
-       __be32                  call_id;        /* call ID on connection  */
-       __be32                  epoch;          /* epoch of this connection */
-       __be16                  service_id;     /* service ID */
+       u32                     call_id;        /* call ID on connection  */
+       u32                     cid;            /* connection ID plus channel index */
+       u32                     epoch;          /* epoch of this connection */
+       u16                     service_id;     /* service ID */
        union {                                 /* Peer IP address for hashing */
                __be32  ipv4_addr;
                __u8    ipv6_addr[16];          /* Anticipates eventual IPv6 support */
@@@ -423,7 -455,7 +455,7 @@@ static inline void rxrpc_abort_call(str
        if (call->state < RXRPC_CALL_COMPLETE) {
                call->abort_code = abort_code;
                call->state = RXRPC_CALL_LOCALLY_ABORTED;
-               set_bit(RXRPC_CALL_ABORT, &call->events);
+               set_bit(RXRPC_CALL_EV_ABORT, &call->events);
        }
        write_unlock_bh(&call->state_lock);
  }
   * af_rxrpc.c
   */
  extern atomic_t rxrpc_n_skbs;
- extern __be32 rxrpc_epoch;
+ extern u32 rxrpc_epoch;
  extern atomic_t rxrpc_debug_id;
  extern struct workqueue_struct *rxrpc_workqueue;
  
@@@ -446,35 -478,35 +478,35 @@@ int rxrpc_reject_call(struct rxrpc_soc
  /*
   * ar-ack.c
   */
- extern unsigned rxrpc_requested_ack_delay;
- extern unsigned rxrpc_soft_ack_delay;
- extern unsigned rxrpc_idle_ack_delay;
- extern unsigned rxrpc_rx_window_size;
- extern unsigned rxrpc_rx_mtu;
- extern unsigned rxrpc_rx_jumbo_max;
+ extern unsigned int rxrpc_requested_ack_delay;
+ extern unsigned int rxrpc_soft_ack_delay;
+ extern unsigned int rxrpc_idle_ack_delay;
+ extern unsigned int rxrpc_rx_window_size;
+ extern unsigned int rxrpc_rx_mtu;
+ extern unsigned int rxrpc_rx_jumbo_max;
  
- void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
- void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
+ void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
+ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
  void rxrpc_process_call(struct work_struct *);
  
  /*
   * ar-call.c
   */
- extern unsigned rxrpc_max_call_lifetime;
- extern unsigned rxrpc_dead_call_expiry;
+ extern unsigned int rxrpc_max_call_lifetime;
+ extern unsigned int rxrpc_dead_call_expiry;
  extern struct kmem_cache *rxrpc_call_jar;
  extern struct list_head rxrpc_calls;
  extern rwlock_t rxrpc_call_lock;
  
- struct rxrpc_call *rxrpc_find_call_hash(u8,  __be32, __be32, __be32,
-                                       __be16, void *, sa_family_t, const u8 *);
+ struct rxrpc_call *rxrpc_find_call_hash(struct rxrpc_host_header *,
+                                       void *, sa_family_t, const void *);
  struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
                                         struct rxrpc_transport *,
                                         struct rxrpc_conn_bundle *,
                                         unsigned long, int, gfp_t);
  struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
                                       struct rxrpc_connection *,
-                                      struct rxrpc_header *, gfp_t);
+                                      struct rxrpc_host_header *, gfp_t);
  struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long);
  void rxrpc_release_call(struct rxrpc_call *);
  void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
@@@ -484,22 -516,22 +516,22 @@@ void __exit rxrpc_destroy_all_calls(voi
  /*
   * ar-connection.c
   */
- extern unsigned rxrpc_connection_expiry;
+ extern unsigned int rxrpc_connection_expiry;
  extern struct list_head rxrpc_connections;
  extern rwlock_t rxrpc_connection_lock;
  
  struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
                                           struct rxrpc_transport *,
-                                          struct key *, __be16, gfp_t);
+                                          struct key *, u16, gfp_t);
  void rxrpc_put_bundle(struct rxrpc_transport *, struct rxrpc_conn_bundle *);
  int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
                       struct rxrpc_conn_bundle *, struct rxrpc_call *, gfp_t);
  void rxrpc_put_connection(struct rxrpc_connection *);
  void __exit rxrpc_destroy_all_connections(void);
  struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
-                                              struct rxrpc_header *);
+                                              struct rxrpc_host_header *);
  extern struct rxrpc_connection *
- rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *,
+ rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_host_header *,
                          gfp_t);
  
  /*
@@@ -547,7 -579,7 +579,7 @@@ int rxrpc_get_server_data_key(struct rx
  /*
   * ar-output.c
   */
- extern unsigned rxrpc_resend_timeout;
+ extern unsigned int rxrpc_resend_timeout;
  
  int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
  int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *,
@@@ -595,7 -627,7 +627,7 @@@ void rxrpc_packet_destructor(struct sk_
  /*
   * ar-transport.c
   */
- extern unsigned rxrpc_transport_expiry;
+ extern unsigned int rxrpc_transport_expiry;
  
  struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
                                            struct rxrpc_peer *, gfp_t);
@@@ -694,7 -726,7 +726,7 @@@ do {                                                               
                printk(KERN_ERR "RxRPC: Assertion failed\n");   \
                BUG();                                          \
        }                                                       \
- } while(0)
+ } while (0)
  
  #define ASSERTCMP(X, OP, Y)                                           \
  do {                                                                  \
                       (unsigned long)(X), (unsigned long)(Y));         \
                BUG();                                                  \
        }                                                               \
- } while(0)
+ } while (0)
  
  #define ASSERTIF(C, X)                                                \
  do {                                                          \
                printk(KERN_ERR "RxRPC: Assertion failed\n");   \
                BUG();                                          \
        }                                                       \
- } while(0)
+ } while (0)
  
  #define ASSERTIFCMP(C, X, OP, Y)                                      \
  do {                                                                  \
                       (unsigned long)(X), (unsigned long)(Y));         \
                BUG();                                                  \
        }                                                               \
- } while(0)
+ } while (0)
  
  #else
  
  #define ASSERT(X)                             \
  do {                                          \
- } while(0)
+ } while (0)
  
  #define ASSERTCMP(X, OP, Y)                   \
  do {                                          \
- } while(0)
+ } while (0)
  
  #define ASSERTIF(C, X)                                \
  do {                                          \
- } while(0)
+ } while (0)
  
  #define ASSERTIFCMP(C, X, OP, Y)              \
  do {                                          \
- } while(0)
+ } while (0)
  
  #endif /* __KDEBUGALL */
  
@@@ -804,9 -836,9 +836,9 @@@ do {                                                       
        CHECK_SLAB_OKAY(&(CALL)->usage);                \
        if (atomic_inc_return(&(CALL)->usage) == 1)     \
                BUG();                                  \
- } while(0)
+ } while (0)
  
  #define rxrpc_put_call(CALL)                          \
  do {                                                  \
        __rxrpc_put_call(CALL);                         \
- } while(0)
+ } while (0)
diff --combined net/rxrpc/rxkad.c
index 0d96b48a64925840cfc2249e928ebdde51ac00a8,3106a0c4960be7db6e726ab70bf1ed6b891b9c2f..f0aeb8163688e6f4167874d9a6ac0f3bd270891d
@@@ -9,11 -9,11 +9,11 @@@
   * 2 of the License, or (at your option) any later version.
   */
  
 +#include <crypto/skcipher.h>
  #include <linux/module.h>
  #include <linux/net.h>
  #include <linux/skbuff.h>
  #include <linux/udp.h>
 -#include <linux/crypto.h>
  #include <linux/scatterlist.h>
  #include <linux/ctype.h>
  #include <linux/slab.h>
@@@ -53,7 -53,7 +53,7 @@@ MODULE_LICENSE("GPL")
   * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
   * packets
   */
 -static struct crypto_blkcipher *rxkad_ci;
 +static struct crypto_skcipher *rxkad_ci;
  static DEFINE_MUTEX(rxkad_ci_mutex);
  
  /*
@@@ -61,7 -61,7 +61,7 @@@
   */
  static int rxkad_init_connection_security(struct rxrpc_connection *conn)
  {
 -      struct crypto_blkcipher *ci;
 +      struct crypto_skcipher *ci;
        struct rxrpc_key_token *token;
        int ret;
  
        token = conn->key->payload.data[0];
        conn->security_ix = token->security_index;
  
 -      ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
 +      ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
        if (IS_ERR(ci)) {
                _debug("no cipher");
                ret = PTR_ERR(ci);
                goto error;
        }
  
 -      if (crypto_blkcipher_setkey(ci, token->kad->session_key,
 -                                  sizeof(token->kad->session_key)) < 0)
 +      if (crypto_skcipher_setkey(ci, token->kad->session_key,
 +                                 sizeof(token->kad->session_key)) < 0)
                BUG();
  
        switch (conn->security_level) {
@@@ -113,7 -113,7 +113,7 @@@ error
  static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
  {
        struct rxrpc_key_token *token;
 -      struct blkcipher_desc desc;
 +      SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
        struct scatterlist sg[2];
        struct rxrpc_crypt iv;
        struct {
        token = conn->key->payload.data[0];
        memcpy(&iv, token->kad->session_key, sizeof(iv));
  
-       tmpbuf.x[0] = conn->epoch;
-       tmpbuf.x[1] = conn->cid;
 -      desc.tfm = conn->cipher;
 -      desc.info = iv.x;
 -      desc.flags = 0;
 -
+       tmpbuf.x[0] = htonl(conn->epoch);
+       tmpbuf.x[1] = htonl(conn->cid);
        tmpbuf.x[2] = 0;
        tmpbuf.x[3] = htonl(conn->security_ix);
  
        sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
        sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
 -      crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
 +
 +      skcipher_request_set_tfm(req, conn->cipher);
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
 +
 +      crypto_skcipher_encrypt(req);
 +      skcipher_request_zero(req);
  
        memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
-       ASSERTCMP(conn->csum_iv.n[0], ==, tmpbuf.x[2]);
+       ASSERTCMP((u32 __force)conn->csum_iv.n[0], ==, (u32 __force)tmpbuf.x[2]);
  
        _leave("");
  }
@@@ -158,7 -156,7 +158,7 @@@ static int rxkad_secure_packet_auth(con
                                    void *sechdr)
  {
        struct rxrpc_skb_priv *sp;
 -      struct blkcipher_desc desc;
 +      SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist sg[2];
        struct {
  
        _enter("");
  
-       check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
-       data_size |= (u32) check << 16;
+       check = sp->hdr.seq ^ sp->hdr.callNumber;
+       data_size |= (u32)check << 16;
  
        tmpbuf.hdr.data_size = htonl(data_size);
        memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
  
        /* start the encryption afresh */
        memset(&iv, 0, sizeof(iv));
 -      desc.tfm = call->conn->cipher;
 -      desc.info = iv.x;
 -      desc.flags = 0;
  
        sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
        sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
 -      crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
 +
 +      skcipher_request_set_tfm(req, call->conn->cipher);
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
 +
 +      crypto_skcipher_encrypt(req);
 +      skcipher_request_zero(req);
  
        memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
  
   * wholly encrypt a packet (level 2 security)
   */
  static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
-                                       struct sk_buff *skb,
-                                       u32 data_size,
-                                       void *sechdr)
+                                      struct sk_buff *skb,
+                                      u32 data_size,
+                                      void *sechdr)
  {
        const struct rxrpc_key_token *token;
        struct rxkad_level2_hdr rxkhdr
                __attribute__((aligned(8))); /* must be all on one page */
        struct rxrpc_skb_priv *sp;
 -      struct blkcipher_desc desc;
 +      SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist sg[16];
        struct sk_buff *trailer;
        unsigned int len;
        u16 check;
        int nsg;
 +      int err;
  
        sp = rxrpc_skb(skb);
  
        _enter("");
  
-       check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+       check = sp->hdr.seq ^ sp->hdr.callNumber;
  
-       rxkhdr.data_size = htonl(data_size | (u32) check << 16);
+       rxkhdr.data_size = htonl(data_size | (u32)check << 16);
        rxkhdr.checksum = 0;
  
        /* encrypt from the session key */
        token = call->conn->key->payload.data[0];
        memcpy(&iv, token->kad->session_key, sizeof(iv));
 -      desc.tfm = call->conn->cipher;
 -      desc.info = iv.x;
 -      desc.flags = 0;
  
        sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
        sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr));
 -      crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(rxkhdr));
 +
 +      skcipher_request_set_tfm(req, call->conn->cipher);
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(rxkhdr), iv.x);
 +
 +      crypto_skcipher_encrypt(req);
  
        /* we want to encrypt the skbuff in-place */
        nsg = skb_cow_data(skb, 0, &trailer);
 +      err = -ENOMEM;
        if (nsg < 0 || nsg > 16)
 -              return -ENOMEM;
 +              goto out;
  
        len = data_size + call->conn->size_align - 1;
        len &= ~(call->conn->size_align - 1);
  
        sg_init_table(sg, nsg);
        skb_to_sgvec(skb, sg, 0, len);
 -      crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
 +
 +      skcipher_request_set_crypt(req, sg, sg, len, iv.x);
 +
 +      crypto_skcipher_encrypt(req);
  
        _leave(" = 0");
 -      return 0;
 +      err = 0;
 +
 +out:
 +      skcipher_request_zero(req);
 +      return err;
  }
  
  /*
   * checksum an RxRPC packet header
   */
  static int rxkad_secure_packet(const struct rxrpc_call *call,
-                               struct sk_buff *skb,
-                               size_t data_size,
-                               void *sechdr)
+                              struct sk_buff *skb,
+                              size_t data_size,
+                              void *sechdr)
  {
        struct rxrpc_skb_priv *sp;
 -      struct blkcipher_desc desc;
 +      SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist sg[2];
        struct {
                __be32 x[2];
        } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
-       __be32 x;
-       u32 y;
+       u32 x, y;
        int ret;
  
        sp = rxrpc_skb(skb);
  
        _enter("{%d{%x}},{#%u},%zu,",
-              call->debug_id, key_serial(call->conn->key), ntohl(sp->hdr.seq),
+              call->debug_id, key_serial(call->conn->key), sp->hdr.seq,
               data_size);
  
        if (!call->conn->cipher)
  
        /* continue encrypting from where we left off */
        memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 -      desc.tfm = call->conn->cipher;
 -      desc.info = iv.x;
 -      desc.flags = 0;
  
        /* calculate the security checksum */
-       x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
-       x |= sp->hdr.seq & cpu_to_be32(0x3fffffff);
-       tmpbuf.x[0] = sp->hdr.callNumber;
-       tmpbuf.x[1] = x;
+       x = call->channel << (32 - RXRPC_CIDSHIFT);
+       x |= sp->hdr.seq & 0x3fffffff;
+       tmpbuf.x[0] = htonl(sp->hdr.callNumber);
+       tmpbuf.x[1] = htonl(x);
  
        sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
        sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
 -      crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
 +
 +      skcipher_request_set_tfm(req, call->conn->cipher);
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
 +
 +      crypto_skcipher_encrypt(req);
 +      skcipher_request_zero(req);
  
        y = ntohl(tmpbuf.x[1]);
        y = (y >> 16) & 0xffff;
        if (y == 0)
                y = 1; /* zero checksums are not permitted */
-       sp->hdr.cksum = htons(y);
+       sp->hdr.cksum = y;
  
        switch (call->conn->security_level) {
        case RXRPC_SECURITY_PLAIN:
@@@ -349,7 -329,7 +348,7 @@@ static int rxkad_verify_packet_auth(con
  {
        struct rxkad_level1_hdr sechdr;
        struct rxrpc_skb_priv *sp;
 -      struct blkcipher_desc desc;
 +      SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist sg[16];
        struct sk_buff *trailer;
  
        /* start the decryption afresh */
        memset(&iv, 0, sizeof(iv));
 -      desc.tfm = call->conn->cipher;
 -      desc.info = iv.x;
 -      desc.flags = 0;
  
 -      crypto_blkcipher_decrypt_iv(&desc, sg, sg, 8);
 +      skcipher_request_set_tfm(req, call->conn->cipher);
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
 +
 +      crypto_skcipher_decrypt(req);
 +      skcipher_request_zero(req);
  
        /* remove the decrypted packet length */
        if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
        data_size = buf & 0xffff;
  
        check = buf >> 16;
-       check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+       check ^= sp->hdr.seq ^ sp->hdr.callNumber;
        check &= 0xffff;
        if (check != 0) {
                *_abort_code = RXKADSEALEDINCON;
@@@ -426,7 -404,7 +425,7 @@@ static int rxkad_verify_packet_encrypt(
        const struct rxrpc_key_token *token;
        struct rxkad_level2_hdr sechdr;
        struct rxrpc_skb_priv *sp;
 -      struct blkcipher_desc desc;
 +      SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist _sg[4], *sg;
        struct sk_buff *trailer;
        /* decrypt from the session key */
        token = call->conn->key->payload.data[0];
        memcpy(&iv, token->kad->session_key, sizeof(iv));
 -      desc.tfm = call->conn->cipher;
 -      desc.info = iv.x;
 -      desc.flags = 0;
  
 -      crypto_blkcipher_decrypt_iv(&desc, sg, sg, skb->len);
 +      skcipher_request_set_tfm(req, call->conn->cipher);
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
 +
 +      crypto_skcipher_decrypt(req);
 +      skcipher_request_zero(req);
        if (sg != _sg)
                kfree(sg);
  
        data_size = buf & 0xffff;
  
        check = buf >> 16;
-       check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+       check ^= sp->hdr.seq ^ sp->hdr.callNumber;
        check &= 0xffff;
        if (check != 0) {
                *_abort_code = RXKADSEALEDINCON;
@@@ -510,23 -486,21 +509,21 @@@ static int rxkad_verify_packet(const st
                               struct sk_buff *skb,
                               u32 *_abort_code)
  {
 -      struct blkcipher_desc desc;
 +      SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_skb_priv *sp;
        struct rxrpc_crypt iv;
        struct scatterlist sg[2];
        struct {
                __be32 x[2];
        } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
-       __be32 x;
-       __be16 cksum;
-       u32 y;
+       u16 cksum;
+       u32 x, y;
        int ret;
  
        sp = rxrpc_skb(skb);
  
        _enter("{%d{%x}},{#%u}",
-              call->debug_id, key_serial(call->conn->key),
-              ntohl(sp->hdr.seq));
+              call->debug_id, key_serial(call->conn->key), sp->hdr.seq);
  
        if (!call->conn->cipher)
                return 0;
  
        /* continue encrypting from where we left off */
        memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 -      desc.tfm = call->conn->cipher;
 -      desc.info = iv.x;
 -      desc.flags = 0;
  
        /* validate the security checksum */
-       x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
-       x |= sp->hdr.seq & cpu_to_be32(0x3fffffff);
-       tmpbuf.x[0] = call->call_id;
-       tmpbuf.x[1] = x;
+       x = call->channel << (32 - RXRPC_CIDSHIFT);
+       x |= sp->hdr.seq & 0x3fffffff;
+       tmpbuf.x[0] = htonl(call->call_id);
+       tmpbuf.x[1] = htonl(x);
  
        sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
        sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
 -      crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
 +
 +      skcipher_request_set_tfm(req, call->conn->cipher);
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
 +
 +      crypto_skcipher_encrypt(req);
 +      skcipher_request_zero(req);
  
        y = ntohl(tmpbuf.x[1]);
-       y = (y >> 16) & 0xffff;
-       if (y == 0)
-               y = 1; /* zero checksums are not permitted */
+       cksum = (y >> 16) & 0xffff;
+       if (cksum == 0)
+               cksum = 1; /* zero checksums are not permitted */
  
-       cksum = htons(y);
        if (sp->hdr.cksum != cksum) {
                *_abort_code = RXKADSEALEDINCON;
                _leave(" = -EPROTO [csum failed]");
  static int rxkad_issue_challenge(struct rxrpc_connection *conn)
  {
        struct rxkad_challenge challenge;
-       struct rxrpc_header hdr;
+       struct rxrpc_wire_header whdr;
        struct msghdr msg;
        struct kvec iov[2];
        size_t len;
+       u32 serial;
        int ret;
  
        _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
        msg.msg_controllen = 0;
        msg.msg_flags   = 0;
  
-       hdr.epoch       = conn->epoch;
-       hdr.cid         = conn->cid;
-       hdr.callNumber  = 0;
-       hdr.seq         = 0;
-       hdr.type        = RXRPC_PACKET_TYPE_CHALLENGE;
-       hdr.flags       = conn->out_clientflag;
-       hdr.userStatus  = 0;
-       hdr.securityIndex = conn->security_ix;
-       hdr._rsvd       = 0;
-       hdr.serviceId   = conn->service_id;
-       iov[0].iov_base = &hdr;
-       iov[0].iov_len  = sizeof(hdr);
+       whdr.epoch      = htonl(conn->epoch);
+       whdr.cid        = htonl(conn->cid);
+       whdr.callNumber = 0;
+       whdr.seq        = 0;
+       whdr.type       = RXRPC_PACKET_TYPE_CHALLENGE;
+       whdr.flags      = conn->out_clientflag;
+       whdr.userStatus = 0;
+       whdr.securityIndex = conn->security_ix;
+       whdr._rsvd      = 0;
+       whdr.serviceId  = htons(conn->service_id);
+       iov[0].iov_base = &whdr;
+       iov[0].iov_len  = sizeof(whdr);
        iov[1].iov_base = &challenge;
        iov[1].iov_len  = sizeof(challenge);
  
        len = iov[0].iov_len + iov[1].iov_len;
  
-       hdr.serial = htonl(atomic_inc_return(&conn->serial));
-       _proto("Tx CHALLENGE %%%u", ntohl(hdr.serial));
+       serial = atomic_inc_return(&conn->serial);
+       whdr.serial = htonl(serial);
+       _proto("Tx CHALLENGE %%%u", serial);
  
        ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
        if (ret < 0) {
   * send a Kerberos security response
   */
  static int rxkad_send_response(struct rxrpc_connection *conn,
-                              struct rxrpc_header *hdr,
+                              struct rxrpc_host_header *hdr,
                               struct rxkad_response *resp,
                               const struct rxkad_key *s2)
  {
+       struct rxrpc_wire_header whdr;
        struct msghdr msg;
        struct kvec iov[3];
        size_t len;
+       u32 serial;
        int ret;
  
        _enter("");
        msg.msg_controllen = 0;
        msg.msg_flags   = 0;
  
-       hdr->epoch      = conn->epoch;
-       hdr->seq        = 0;
-       hdr->type       = RXRPC_PACKET_TYPE_RESPONSE;
-       hdr->flags      = conn->out_clientflag;
-       hdr->userStatus = 0;
-       hdr->_rsvd      = 0;
+       memset(&whdr, 0, sizeof(whdr));
+       whdr.epoch      = htonl(hdr->epoch);
+       whdr.cid        = htonl(hdr->cid);
+       whdr.type       = RXRPC_PACKET_TYPE_RESPONSE;
+       whdr.flags      = conn->out_clientflag;
+       whdr.securityIndex = hdr->securityIndex;
+       whdr.serviceId  = htons(hdr->serviceId);
  
-       iov[0].iov_base = hdr;
-       iov[0].iov_len  = sizeof(*hdr);
+       iov[0].iov_base = &whdr;
+       iov[0].iov_len  = sizeof(whdr);
        iov[1].iov_base = resp;
        iov[1].iov_len  = sizeof(*resp);
-       iov[2].iov_base = (void *) s2->ticket;
+       iov[2].iov_base = (void *)s2->ticket;
        iov[2].iov_len  = s2->ticket_len;
  
        len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
  
-       hdr->serial = htonl(atomic_inc_return(&conn->serial));
-       _proto("Tx RESPONSE %%%u", ntohl(hdr->serial));
+       serial = atomic_inc_return(&conn->serial);
+       whdr.serial = htonl(serial);
+       _proto("Tx RESPONSE %%%u", serial);
  
        ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len);
        if (ret < 0) {
@@@ -744,21 -720,18 +746,21 @@@ static void rxkad_encrypt_response(stru
                                   struct rxkad_response *resp,
                                   const struct rxkad_key *s2)
  {
 -      struct blkcipher_desc desc;
 +      SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist sg[2];
  
        /* continue encrypting from where we left off */
        memcpy(&iv, s2->session_key, sizeof(iv));
 -      desc.tfm = conn->cipher;
 -      desc.info = iv.x;
 -      desc.flags = 0;
  
        rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
 -      crypto_blkcipher_encrypt_iv(&desc, sg, sg, sizeof(resp->encrypted));
 +
 +      skcipher_request_set_tfm(req, conn->cipher);
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
 +
 +      crypto_skcipher_encrypt(req);
 +      skcipher_request_zero(req);
  }
  
  /*
@@@ -799,7 -772,7 +801,7 @@@ static int rxkad_respond_to_challenge(s
        min_level = ntohl(challenge.min_level);
  
        _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
-              ntohl(sp->hdr.serial), version, nonce, min_level);
+              sp->hdr.serial, version, nonce, min_level);
  
        abort_code = RXKADINCONSISTENCY;
        if (version != RXKAD_VERSION)
        /* build the response packet */
        memset(&resp, 0, sizeof(resp));
  
-       resp.version = RXKAD_VERSION;
-       resp.encrypted.epoch = conn->epoch;
-       resp.encrypted.cid = conn->cid;
-       resp.encrypted.securityIndex = htonl(conn->security_ix);
+       resp.version                    = htonl(RXKAD_VERSION);
+       resp.encrypted.epoch            = htonl(conn->epoch);
+       resp.encrypted.cid              = htonl(conn->cid);
+       resp.encrypted.securityIndex    = htonl(conn->security_ix);
+       resp.encrypted.inc_nonce        = htonl(nonce + 1);
+       resp.encrypted.level            = htonl(conn->security_level);
+       resp.kvno                       = htonl(token->kad->kvno);
+       resp.ticket_len                 = htonl(token->kad->ticket_len);
        resp.encrypted.call_id[0] =
-               (conn->channels[0] ? conn->channels[0]->call_id : 0);
+               htonl(conn->channels[0] ? conn->channels[0]->call_id : 0);
        resp.encrypted.call_id[1] =
-               (conn->channels[1] ? conn->channels[1]->call_id : 0);
+               htonl(conn->channels[1] ? conn->channels[1]->call_id : 0);
        resp.encrypted.call_id[2] =
-               (conn->channels[2] ? conn->channels[2]->call_id : 0);
+               htonl(conn->channels[2] ? conn->channels[2]->call_id : 0);
        resp.encrypted.call_id[3] =
-               (conn->channels[3] ? conn->channels[3]->call_id : 0);
-       resp.encrypted.inc_nonce = htonl(nonce + 1);
-       resp.encrypted.level = htonl(conn->security_level);
-       resp.kvno = htonl(token->kad->kvno);
-       resp.ticket_len = htonl(token->kad->ticket_len);
+               htonl(conn->channels[3] ? conn->channels[3]->call_id : 0);
  
        /* calculate the response checksum and then do the encryption */
        rxkad_calc_response_checksum(&resp);
@@@ -851,7 -825,7 +854,7 @@@ static int rxkad_decrypt_ticket(struct 
                                time_t *_expiry,
                                u32 *_abort_code)
  {
 -      struct blkcipher_desc desc;
 +      struct skcipher_request *req;
        struct rxrpc_crypt iv, key;
        struct scatterlist sg[1];
        struct in_addr addr;
  
        memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv));
  
 -      desc.tfm = conn->server_key->payload.data[0];
 -      desc.info = iv.x;
 -      desc.flags = 0;
 +      req = skcipher_request_alloc(conn->server_key->payload.data[0],
 +                                   GFP_NOFS);
 +      if (!req) {
 +              *_abort_code = RXKADNOAUTH;
 +              ret = -ENOMEM;
 +              goto error;
 +      }
  
        sg_init_one(&sg[0], ticket, ticket_len);
 -      crypto_blkcipher_decrypt_iv(&desc, sg, sg, ticket_len);
 +
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x);
 +
 +      crypto_skcipher_decrypt(req);
 +      skcipher_request_free(req);
  
        p = ticket;
        end = p + ticket_len;
@@@ -1004,7 -969,7 +1007,7 @@@ static void rxkad_decrypt_response(stru
                                   struct rxkad_response *resp,
                                   const struct rxrpc_crypt *session_key)
  {
 -      struct blkcipher_desc desc;
 +      SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
        struct scatterlist sg[2];
        struct rxrpc_crypt iv;
  
        ASSERT(rxkad_ci != NULL);
  
        mutex_lock(&rxkad_ci_mutex);
 -      if (crypto_blkcipher_setkey(rxkad_ci, session_key->x,
 -                                  sizeof(*session_key)) < 0)
 +      if (crypto_skcipher_setkey(rxkad_ci, session_key->x,
 +                                 sizeof(*session_key)) < 0)
                BUG();
  
        memcpy(&iv, session_key, sizeof(iv));
 -      desc.tfm = rxkad_ci;
 -      desc.info = iv.x;
 -      desc.flags = 0;
  
        rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
 -      crypto_blkcipher_decrypt_iv(&desc, sg, sg, sizeof(resp->encrypted));
 +
 +      skcipher_request_set_tfm(req, rxkad_ci);
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
 +
 +      crypto_skcipher_decrypt(req);
 +      skcipher_request_zero(req);
 +
        mutex_unlock(&rxkad_ci_mutex);
  
        _leave("");
@@@ -1064,7 -1025,7 +1067,7 @@@ static int rxkad_verify_response(struc
        kvno = ntohl(response.kvno);
        sp = rxrpc_skb(skb);
        _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
-              ntohl(sp->hdr.serial), version, kvno, ticket_len);
+              sp->hdr.serial, version, kvno, ticket_len);
  
        abort_code = RXKADINCONSISTENCY;
        if (version != RXKAD_VERSION)
        rxkad_decrypt_response(conn, &response, &session_key);
  
        abort_code = RXKADSEALEDINCON;
-       if (response.encrypted.epoch != conn->epoch)
+       if (ntohl(response.encrypted.epoch) != conn->epoch)
                goto protocol_error_free;
-       if (response.encrypted.cid != conn->cid)
+       if (ntohl(response.encrypted.cid) != conn->cid)
                goto protocol_error_free;
        if (ntohl(response.encrypted.securityIndex) != conn->security_ix)
                goto protocol_error_free;
                goto protocol_error_free;
  
        abort_code = RXKADOUTOFSEQUENCE;
-       if (response.encrypted.inc_nonce != htonl(conn->security_nonce + 1))
+       if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
                goto protocol_error_free;
  
        abort_code = RXKADLEVELFAIL;
@@@ -1157,7 -1118,7 +1160,7 @@@ static void rxkad_clear(struct rxrpc_co
        _enter("");
  
        if (conn->cipher)
 -              crypto_free_blkcipher(conn->cipher);
 +              crypto_free_skcipher(conn->cipher);
  }
  
  /*
@@@ -1183,7 -1144,7 +1186,7 @@@ static __init int rxkad_init(void
  
        /* pin the cipher we need so that the crypto layer doesn't invoke
         * keventd to go get it */
 -      rxkad_ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
 +      rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
        if (IS_ERR(rxkad_ci))
                return PTR_ERR(rxkad_ci);
  
@@@ -1197,7 -1158,7 +1200,7 @@@ static __exit void rxkad_exit(void
        _enter("");
  
        rxrpc_unregister_security(&rxkad);
 -      crypto_free_blkcipher(rxkad_ci);
 +      crypto_free_skcipher(rxkad_ci);
  }
  
  module_exit(rxkad_exit);
diff --combined net/sctp/sm_make_chunk.c
index 1296e555fe29113e65fd74a3112c7463f224dc25,8449ca26aa0bfd9e787670e5a2eea5afd85773b6..e47abf254ff3481de782de0f48a535b9a562903f
@@@ -45,7 -45,6 +45,7 @@@
  
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
 +#include <crypto/hash.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/ip.h>
@@@ -53,6 -52,7 +53,6 @@@
  #include <linux/net.h>
  #include <linux/inet.h>
  #include <linux/scatterlist.h>
 -#include <linux/crypto.h>
  #include <linux/slab.h>
  #include <net/sock.h>
  
  #include <net/sctp/sm.h>
  
  static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
-                                           __u8 type, __u8 flags, int paylen);
+                                           __u8 type, __u8 flags, int paylen,
+                                           gfp_t gfp);
  static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
-                                        __u8 flags, int paylen);
+                                        __u8 flags, int paylen, gfp_t gfp);
  static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
-                                          __u8 type, __u8 flags, int paylen);
+                                          __u8 type, __u8 flags, int paylen,
+                                          gfp_t gfp);
  static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
                                        const struct sctp_association *asoc,
                                        const struct sctp_chunk *init_chunk,
@@@ -318,7 -320,7 +320,7 @@@ struct sctp_chunk *sctp_make_init(cons
         * PLEASE DO NOT FIXME [This version does not support Host Name.]
         */
  
-       retval = sctp_make_control(asoc, SCTP_CID_INIT, 0, chunksize);
+       retval = sctp_make_control(asoc, SCTP_CID_INIT, 0, chunksize, gfp);
        if (!retval)
                goto nodata;
  
@@@ -465,7 -467,7 +467,7 @@@ struct sctp_chunk *sctp_make_init_ack(c
                                        num_ext);
  
        /* Now allocate and fill out the chunk.  */
-       retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize);
+       retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
        if (!retval)
                goto nomem_chunk;
  
@@@ -570,7 -572,8 +572,8 @@@ struct sctp_chunk *sctp_make_cookie_ech
        cookie_len = asoc->peer.cookie_len;
  
        /* Build a cookie echo chunk.  */
-       retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ECHO, 0, cookie_len);
+       retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ECHO, 0,
+                                  cookie_len, GFP_ATOMIC);
        if (!retval)
                goto nodata;
        retval->subh.cookie_hdr =
@@@ -615,7 -618,7 +618,7 @@@ struct sctp_chunk *sctp_make_cookie_ack
  {
        struct sctp_chunk *retval;
  
-       retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ACK, 0, 0);
+       retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ACK, 0, 0, GFP_ATOMIC);
  
        /* RFC 2960 6.4 Multi-homed SCTP Endpoints
         *
@@@ -664,7 -667,7 +667,7 @@@ struct sctp_chunk *sctp_make_cwr(const 
  
        cwr.lowest_tsn = htonl(lowest_tsn);
        retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0,
-                                  sizeof(sctp_cwrhdr_t));
+                                  sizeof(sctp_cwrhdr_t), GFP_ATOMIC);
  
        if (!retval)
                goto nodata;
@@@ -698,7 -701,7 +701,7 @@@ struct sctp_chunk *sctp_make_ecne(cons
  
        ecne.lowest_tsn = htonl(lowest_tsn);
        retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0,
-                                  sizeof(sctp_ecnehdr_t));
+                                  sizeof(sctp_ecnehdr_t), GFP_ATOMIC);
        if (!retval)
                goto nodata;
        retval->subh.ecne_hdr =
@@@ -713,7 -716,8 +716,8 @@@ nodata
   */
  struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
                                       const struct sctp_sndrcvinfo *sinfo,
-                                      int data_len, __u8 flags, __u16 ssn)
+                                      int data_len, __u8 flags, __u16 ssn,
+                                      gfp_t gfp)
  {
        struct sctp_chunk *retval;
        struct sctp_datahdr dp;
                dp.ssn = htons(ssn);
  
        chunk_len = sizeof(dp) + data_len;
-       retval = sctp_make_data(asoc, flags, chunk_len);
+       retval = sctp_make_data(asoc, flags, chunk_len, gfp);
        if (!retval)
                goto nodata;
  
@@@ -781,7 -785,7 +785,7 @@@ struct sctp_chunk *sctp_make_sack(cons
                + sizeof(__u32) * num_dup_tsns;
  
        /* Create the chunk.  */
-       retval = sctp_make_control(asoc, SCTP_CID_SACK, 0, len);
+       retval = sctp_make_control(asoc, SCTP_CID_SACK, 0, len, GFP_ATOMIC);
        if (!retval)
                goto nodata;
  
@@@ -861,7 -865,7 +865,7 @@@ struct sctp_chunk *sctp_make_shutdown(c
        shut.cum_tsn_ack = htonl(ctsn);
  
        retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
-                                  sizeof(sctp_shutdownhdr_t));
+                                  sizeof(sctp_shutdownhdr_t), GFP_ATOMIC);
        if (!retval)
                goto nodata;
  
@@@ -879,7 -883,8 +883,8 @@@ struct sctp_chunk *sctp_make_shutdown_a
  {
        struct sctp_chunk *retval;
  
-       retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0);
+       retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0,
+                                  GFP_ATOMIC);
  
        /* RFC 2960 6.4 Multi-homed SCTP Endpoints
         *
@@@ -908,7 -913,8 +913,8 @@@ struct sctp_chunk *sctp_make_shutdown_c
         */
        flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T;
  
-       retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags, 0);
+       retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags,
+                                  0, GFP_ATOMIC);
  
        /* RFC 2960 6.4 Multi-homed SCTP Endpoints
         *
@@@ -947,7 -953,8 +953,8 @@@ struct sctp_chunk *sctp_make_abort(cons
                        flags = SCTP_CHUNK_FLAG_T;
        }
  
-       retval = sctp_make_control(asoc, SCTP_CID_ABORT, flags, hint);
+       retval = sctp_make_control(asoc, SCTP_CID_ABORT, flags, hint,
+                                  GFP_ATOMIC);
  
        /* RFC 2960 6.4 Multi-homed SCTP Endpoints
         *
@@@ -1139,7 -1146,8 +1146,8 @@@ struct sctp_chunk *sctp_make_heartbeat(
        struct sctp_chunk *retval;
        sctp_sender_hb_info_t hbinfo;
  
-       retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo));
+       retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0,
+                                  sizeof(hbinfo), GFP_ATOMIC);
  
        if (!retval)
                goto nodata;
@@@ -1167,7 -1175,8 +1175,8 @@@ struct sctp_chunk *sctp_make_heartbeat_
  {
        struct sctp_chunk *retval;
  
-       retval  = sctp_make_control(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen);
+       retval  = sctp_make_control(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen,
+                                   GFP_ATOMIC);
        if (!retval)
                goto nodata;
  
@@@ -1200,7 -1209,7 +1209,7 @@@ static struct sctp_chunk *sctp_make_op_
        struct sctp_chunk *retval;
  
        retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0,
-                                  sizeof(sctp_errhdr_t) + size);
+                                  sizeof(sctp_errhdr_t) + size, GFP_ATOMIC);
        if (!retval)
                goto nodata;
  
@@@ -1271,7 -1280,8 +1280,8 @@@ struct sctp_chunk *sctp_make_auth(cons
                return NULL;
  
        retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0,
-                       hmac_desc->hmac_len + sizeof(sctp_authhdr_t));
+                       hmac_desc->hmac_len + sizeof(sctp_authhdr_t),
+                       GFP_ATOMIC);
        if (!retval)
                return NULL;
  
   */
  struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
                            const struct sctp_association *asoc,
-                           struct sock *sk)
+                           struct sock *sk, gfp_t gfp)
  {
        struct sctp_chunk *retval;
  
-       retval = kmem_cache_zalloc(sctp_chunk_cachep, GFP_ATOMIC);
+       retval = kmem_cache_zalloc(sctp_chunk_cachep, gfp);
  
        if (!retval)
                goto nodata;
@@@ -1361,7 -1371,8 +1371,8 @@@ const union sctp_addr *sctp_source(cons
   * arguments, reserving enough space for a 'paylen' byte payload.
   */
  static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
-                                           __u8 type, __u8 flags, int paylen)
+                                           __u8 type, __u8 flags, int paylen,
+                                           gfp_t gfp)
  {
        struct sctp_chunk *retval;
        sctp_chunkhdr_t *chunk_hdr;
        struct sock *sk;
  
        /* No need to allocate LL here, as this is only a chunk. */
-       skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen),
-                       GFP_ATOMIC);
+       skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp);
        if (!skb)
                goto nodata;
  
        chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t));
  
        sk = asoc ? asoc->base.sk : NULL;
-       retval = sctp_chunkify(skb, asoc, sk);
+       retval = sctp_chunkify(skb, asoc, sk, gfp);
        if (!retval) {
                kfree_skb(skb);
                goto nodata;
@@@ -1400,16 -1410,18 +1410,18 @@@ nodata
  }
  
  static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
-                                        __u8 flags, int paylen)
+                                        __u8 flags, int paylen, gfp_t gfp)
  {
-       return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen);
+       return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen, gfp);
  }
  
  static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
-                                           __u8 type, __u8 flags, int paylen)
+                                           __u8 type, __u8 flags, int paylen,
+                                           gfp_t gfp)
  {
-       struct sctp_chunk *chunk = _sctp_make_chunk(asoc, type, flags, paylen);
+       struct sctp_chunk *chunk;
  
+       chunk = _sctp_make_chunk(asoc, type, flags, paylen, gfp);
        if (chunk)
                sctp_control_set_owner_w(chunk);
  
@@@ -1606,6 -1618,7 +1618,6 @@@ static sctp_cookie_param_t *sctp_pack_c
  {
        sctp_cookie_param_t *retval;
        struct sctp_signed_cookie *cookie;
 -      struct scatterlist sg;
        int headersize, bodysize;
  
        /* Header size is static data prior to the actual cookie, including
               ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len);
  
        if (sctp_sk(ep->base.sk)->hmac) {
 -              struct hash_desc desc;
 +              SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac);
 +              int err;
  
                /* Sign the message.  */
 -              sg_init_one(&sg, &cookie->c, bodysize);
 -              desc.tfm = sctp_sk(ep->base.sk)->hmac;
 -              desc.flags = 0;
 -
 -              if (crypto_hash_setkey(desc.tfm, ep->secret_key,
 -                                     sizeof(ep->secret_key)) ||
 -                  crypto_hash_digest(&desc, &sg, bodysize, cookie->signature))
 +              desc->tfm = sctp_sk(ep->base.sk)->hmac;
 +              desc->flags = 0;
 +
 +              err = crypto_shash_setkey(desc->tfm, ep->secret_key,
 +                                        sizeof(ep->secret_key)) ?:
 +                    crypto_shash_digest(desc, (u8 *)&cookie->c, bodysize,
 +                                        cookie->signature);
 +              shash_desc_zero(desc);
 +              if (err)
                        goto free_cookie;
        }
  
@@@ -1699,10 -1709,12 +1711,10 @@@ struct sctp_association *sctp_unpack_co
        struct sctp_cookie *bear_cookie;
        int headersize, bodysize, fixed_size;
        __u8 *digest = ep->digest;
 -      struct scatterlist sg;
        unsigned int len;
        sctp_scope_t scope;
        struct sk_buff *skb = chunk->skb;
        ktime_t kt;
 -      struct hash_desc desc;
  
        /* Header size is static data prior to the actual cookie, including
         * any padding.
                goto no_hmac;
  
        /* Check the signature.  */
 -      sg_init_one(&sg, bear_cookie, bodysize);
 -      desc.tfm = sctp_sk(ep->base.sk)->hmac;
 -      desc.flags = 0;
 -
 -      memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
 -      if (crypto_hash_setkey(desc.tfm, ep->secret_key,
 -                             sizeof(ep->secret_key)) ||
 -          crypto_hash_digest(&desc, &sg, bodysize, digest)) {
 -              *error = -SCTP_IERROR_NOMEM;
 -              goto fail;
 +      {
 +              SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac);
 +              int err;
 +
 +              desc->tfm = sctp_sk(ep->base.sk)->hmac;
 +              desc->flags = 0;
 +
 +              err = crypto_shash_setkey(desc->tfm, ep->secret_key,
 +                                        sizeof(ep->secret_key)) ?:
 +                    crypto_shash_digest(desc, (u8 *)bear_cookie, bodysize,
 +                                        digest);
 +              shash_desc_zero(desc);
 +
 +              if (err) {
 +                      *error = -SCTP_IERROR_NOMEM;
 +                      goto fail;
 +              }
        }
  
        if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
@@@ -2763,7 -2768,8 +2775,8 @@@ static struct sctp_chunk *sctp_make_asc
        length += addrlen;
  
        /* Create the chunk.  */
-       retval = sctp_make_control(asoc, SCTP_CID_ASCONF, 0, length);
+       retval = sctp_make_control(asoc, SCTP_CID_ASCONF, 0, length,
+                                  GFP_ATOMIC);
        if (!retval)
                return NULL;
  
@@@ -2947,7 -2953,8 +2960,8 @@@ static struct sctp_chunk *sctp_make_asc
        int                     length = sizeof(asconf) + vparam_len;
  
        /* Create the chunk.  */
-       retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length);
+       retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length,
+                                  GFP_ATOMIC);
        if (!retval)
                return NULL;
  
@@@ -3507,7 -3514,7 +3521,7 @@@ struct sctp_chunk *sctp_make_fwdtsn(con
  
        hint = (nstreams + 1) * sizeof(__u32);
  
-       retval = sctp_make_control(asoc, SCTP_CID_FWD_TSN, 0, hint);
+       retval = sctp_make_control(asoc, SCTP_CID_FWD_TSN, 0, hint, GFP_ATOMIC);
  
        if (!retval)
                return NULL;
diff --combined net/sctp/socket.c
index de8eabf03eed9b904afd78e9af6f2ab0b172cd2b,f28ecf88cfeac261301521ed002fbce7fbb9fb5c..96e08111106f3cbae3b438bfede67958880d2af2
@@@ -52,7 -52,6 +52,7 @@@
  
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
 +#include <crypto/hash.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/wait.h>
@@@ -62,6 -61,7 +62,6 @@@
  #include <linux/fcntl.h>
  #include <linux/poll.h>
  #include <linux/init.h>
 -#include <linux/crypto.h>
  #include <linux/slab.h>
  #include <linux/file.h>
  #include <linux/compat.h>
@@@ -4160,7 -4160,7 +4160,7 @@@ static void sctp_destruct_sock(struct s
        struct sctp_sock *sp = sctp_sk(sk);
  
        /* Free up the HMAC transform. */
 -      crypto_free_hash(sp->hmac);
 +      crypto_free_shash(sp->hmac);
  
        inet_sock_destruct(sk);
  }
@@@ -6106,9 -6106,10 +6106,10 @@@ static int sctp_getsockopt(struct sock 
        return retval;
  }
  
- static void sctp_hash(struct sock *sk)
+ static int sctp_hash(struct sock *sk)
  {
        /* STUB */
+       return 0;
  }
  
  static void sctp_unhash(struct sock *sk)
@@@ -6304,13 -6305,13 +6305,13 @@@ static int sctp_listen_start(struct soc
  {
        struct sctp_sock *sp = sctp_sk(sk);
        struct sctp_endpoint *ep = sp->ep;
 -      struct crypto_hash *tfm = NULL;
 +      struct crypto_shash *tfm = NULL;
        char alg[32];
  
        /* Allocate HMAC for generating cookie. */
        if (!sp->hmac && sp->sctp_hmac_alg) {
                sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg);
 -              tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
 +              tfm = crypto_alloc_shash(alg, 0, 0);
                if (IS_ERR(tfm)) {
                        net_info_ratelimited("failed to load transform for %s: %ld\n",
                                             sp->sctp_hmac_alg, PTR_ERR(tfm));
@@@ -7253,14 -7254,12 +7254,12 @@@ static void sctp_sock_migrate(struct so
        /* Hook this new socket in to the bind_hash list. */
        head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk),
                                                 inet_sk(oldsk)->inet_num)];
-       local_bh_disable();
-       spin_lock(&head->lock);
+       spin_lock_bh(&head->lock);
        pp = sctp_sk(oldsk)->bind_hash;
        sk_add_bind_node(newsk, &pp->owner);
        sctp_sk(newsk)->bind_hash = pp;
        inet_sk(newsk)->inet_num = inet_sk(oldsk)->inet_num;
-       spin_unlock(&head->lock);
-       local_bh_enable();
+       spin_unlock_bh(&head->lock);
  
        /* Copy the bind_addr list from the original endpoint to the new
         * endpoint so that we can handle restarts properly
This page took 0.740303 seconds and 4 git commands to generate.