]> Git Repo - linux.git/commitdiff
Merge remote-tracking branch 'torvalds/master' into perf/core
authorArnaldo Carvalho de Melo <[email protected]>
Tue, 26 Nov 2019 14:06:19 +0000 (11:06 -0300)
committerArnaldo Carvalho de Melo <[email protected]>
Tue, 26 Nov 2019 14:06:19 +0000 (11:06 -0300)
To pick up BPF changes we'll need.

Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1  2 
MAINTAINERS
arch/x86/events/intel/core.c
arch/x86/kvm/x86.c
include/linux/perf_event.h
kernel/events/core.c

diff --combined MAINTAINERS
index 81dd8f902bdcb1f52cc2405e403995f97464bd0b,8f075b866aaf6a4d9b4b88be1926a3c8d15f4736..2c7aa547d8b86275fd9f51c10619d8a0408cacdd
@@@ -643,7 -643,7 +643,7 @@@ F: drivers/net/ethernet/alacritech/
  
  FORCEDETH GIGABIT ETHERNET DRIVER
  M:    Rain River <[email protected]>
- M:    Zhu Yanjun <yanjun.zhu@oracle.com>
+ M:    Zhu Yanjun <zyjzyj2000@gmail.com>
  L:    [email protected]
  S:    Maintained
  F:    drivers/net/ethernet/nvidia/*
@@@ -682,11 -682,11 +682,11 @@@ S:      Maintaine
  F:    Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt
  F:    drivers/cpufreq/sun50i-cpufreq-nvmem.c
  
- ALLWINNER SECURITY SYSTEM
+ ALLWINNER CRYPTO DRIVERS
  M:    Corentin Labbe <[email protected]>
  L:    [email protected]
  S:    Maintained
- F:    drivers/crypto/sunxi-ss/
+ F:    drivers/crypto/allwinner/
  
  ALLWINNER VPU DRIVER
  M:    Maxime Ripard <[email protected]>
@@@ -1182,14 -1182,21 +1182,21 @@@ S:   Maintaine
  F:    drivers/media/i2c/aptina-pll.*
  
  AQUANTIA ETHERNET DRIVER (atlantic)
- M:    Igor Russkikh <igor.russkikh@aquantia.com>
+ M:    Igor Russkikh <irusskikh@marvell.com>
  L:    [email protected]
  S:    Supported
- W:    http://www.aquantia.com
+ W:    https://www.marvell.com/
  Q:    http://patchwork.ozlabs.org/project/netdev/list/
  F:    drivers/net/ethernet/aquantia/atlantic/
  F:    Documentation/networking/device_drivers/aquantia/atlantic.txt
  
+ AQUANTIA ETHERNET DRIVER PTP SUBSYSTEM
+ M:    Egor Pomozov <[email protected]>
+ L:    [email protected]
+ S:    Supported
+ W:    http://www.aquantia.com
+ F:    drivers/net/ethernet/aquantia/atlantic/aq_ptp*
  ARC FRAMEBUFFER DRIVER
  M:    Jaya Kumar <[email protected]>
  S:    Maintained
@@@ -1470,6 -1477,14 +1477,14 @@@ F:    drivers/soc/amlogic
  F:    drivers/rtc/rtc-meson*
  N:    meson
  
+ ARM/Amlogic Meson SoC Crypto Drivers
+ M:    Corentin Labbe <[email protected]>
+ L:    [email protected]
+ L:    [email protected]
+ S:    Maintained
+ F:    drivers/crypto/amlogic/
+ F:    Documentation/devicetree/bindings/crypto/amlogic*
  ARM/Amlogic Meson SoC Sound Drivers
  M:    Jerome Brunet <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
@@@ -2611,6 -2626,7 +2626,7 @@@ S:      Maintaine
  F:    arch/arm64/
  X:    arch/arm64/boot/dts/
  F:    Documentation/arm64/
+ F:    tools/testing/selftests/arm64/
  
  AS3645A LED FLASH CONTROLLER DRIVER
  M:    Sakari Ailus <[email protected]>
@@@ -3595,6 -3611,13 +3611,13 @@@ S:    Maintaine
  F:    Documentation/devicetree/bindings/media/cdns,*.txt
  F:    drivers/media/platform/cadence/cdns-csi2*
  
+ CADENCE NAND DRIVER
+ M:    Piotr Sroka <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    drivers/mtd/nand/raw/cadence-nand-controller.c
+ F:    Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt
  CADET FM/AM RADIO RECEIVER DRIVER
  M:    Hans Verkuil <[email protected]>
  L:    [email protected]
@@@ -5046,10 -5069,14 +5069,14 @@@ M:   Ioana Radulescu <ruxandra.radulescu@
  L:    [email protected]
  S:    Maintained
  F:    drivers/net/ethernet/freescale/dpaa2/dpaa2-eth*
+ F:    drivers/net/ethernet/freescale/dpaa2/dpaa2-mac*
  F:    drivers/net/ethernet/freescale/dpaa2/dpni*
+ F:    drivers/net/ethernet/freescale/dpaa2/dpmac*
  F:    drivers/net/ethernet/freescale/dpaa2/dpkg.h
  F:    drivers/net/ethernet/freescale/dpaa2/Makefile
  F:    drivers/net/ethernet/freescale/dpaa2/Kconfig
+ F:    Documentation/networking/device_drivers/freescale/dpaa2/ethernet-driver.rst
+ F:    Documentation/networking/device_drivers/freescale/dpaa2/mac-phy-support.rst
  
  DPAA2 ETHERNET SWITCH DRIVER
  M:    Ioana Radulescu <[email protected]>
@@@ -6143,10 -6170,12 +6170,12 @@@ S:   Maintaine
  F:    Documentation/ABI/testing/sysfs-class-net-phydev
  F:    Documentation/devicetree/bindings/net/ethernet-phy.yaml
  F:    Documentation/devicetree/bindings/net/mdio*
+ F:    Documentation/devicetree/bindings/net/qca,ar803x.yaml
  F:    Documentation/networking/phy.rst
  F:    drivers/net/phy/
  F:    drivers/of/of_mdio.c
  F:    drivers/of/of_net.c
+ F:    include/dt-bindings/net/qca-ar803x.h
  F:    include/linux/*mdio*.h
  F:    include/linux/of_net.h
  F:    include/linux/phy.h
@@@ -7364,6 -7393,25 +7393,25 @@@ F:    include/uapi/linux/if_hippi.
  F:    net/802/hippi.c
  F:    drivers/net/hippi/
  
+ HISILICON SECURITY ENGINE V2 DRIVER (SEC2)
+ M:    Zaibo Xu <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    drivers/crypto/hisilicon/sec2/sec_crypto.c
+ F:    drivers/crypto/hisilicon/sec2/sec_main.c
+ F:    drivers/crypto/hisilicon/sec2/sec_crypto.h
+ F:    drivers/crypto/hisilicon/sec2/sec.h
+ F:    Documentation/ABI/testing/debugfs-hisi-sec
+ HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE)
+ M:    Zaibo Xu <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    drivers/crypto/hisilicon/hpre/hpre_crypto.c
+ F:    drivers/crypto/hisilicon/hpre/hpre_main.c
+ F:    drivers/crypto/hisilicon/hpre/hpre.h
+ F:    Documentation/ABI/testing/debugfs-hisi-hpre
  HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3)
  M:    Yisen Zhuang <[email protected]>
  M:    Salil Mehta <[email protected]>
@@@ -7372,6 -7420,11 +7420,11 @@@ W:    http://www.hisilicon.co
  S:    Maintained
  F:    drivers/net/ethernet/hisilicon/hns3/
  
+ HISILICON TRUE RANDOM NUMBER GENERATOR V2 SUPPORT
+ M:    Zaibo Xu <[email protected]>
+ S:    Maintained
+ F:    drivers/char/hw_random/hisi-trng-v2.c
  HISILICON LPC BUS DRIVER
  M:    [email protected]
  W:    http://www.hisilicon.com
@@@ -7417,7 -7470,6 +7470,6 @@@ S:      Maintaine
  F:    drivers/crypto/hisilicon/qm.c
  F:    drivers/crypto/hisilicon/qm.h
  F:    drivers/crypto/hisilicon/sgl.c
- F:    drivers/crypto/hisilicon/sgl.h
  F:    drivers/crypto/hisilicon/zip/
  F:    Documentation/ABI/testing/debugfs-hisi-zip
  
@@@ -7443,8 -7495,8 +7495,8 @@@ F:      drivers/platform/x86/tc1100-wmi.
  
  HP100:        Driver for HP 10/100 Mbit/s Voice Grade Network Adapter Series
  M:    Jaroslav Kysela <[email protected]>
- S:    Maintained
- F:    drivers/net/ethernet/hp/hp100.*
+ S:    Obsolete
+ F:    drivers/staging/hp/hp100.*
  
  HPET: High Precision Event Timers driver
  M:    Clemens Ladisch <[email protected]>
@@@ -7729,7 -7781,7 +7781,7 @@@ F:      drivers/i2c/i2c-stub.
  
  I3C SUBSYSTEM
  M:    Boris Brezillon <[email protected]>
- L:    [email protected]
+ L:    [email protected] (moderated for non-subscribers)
  C:    irc://chat.freenode.net/linux-i3c
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux.git
  S:    Maintained
@@@ -7745,6 -7797,12 +7797,12 @@@ S:    Maintaine
  F:    Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.txt
  F:    drivers/i3c/master/dw*
  
+ I3C DRIVER FOR CADENCE I3C MASTER IP
+ M:      Przemysław Gaj <[email protected]>
+ S:      Maintained
+ F:      Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt
+ F:      drivers/i3c/master/i3c-master-cdns.c
  IA64 (Itanium) PLATFORM
  M:    Tony Luck <[email protected]>
  M:    Fenghua Yu <[email protected]>
@@@ -8564,12 -8622,13 +8622,13 @@@ F:   include/linux/iova.
  
  IO_URING
  M:    Jens Axboe <[email protected]>
- L:    [email protected]
- L:    [email protected]
+ L:    [email protected]
  T:    git git://git.kernel.dk/linux-block
  T:    git git://git.kernel.dk/liburing
  S:    Maintained
  F:    fs/io_uring.c
+ F:    fs/io-wq.c
+ F:    fs/io-wq.h
  F:    include/uapi/linux/io_uring.h
  
  IPMI SUBSYSTEM
@@@ -8920,6 -8979,17 +8979,17 @@@ S:    Maintaine
  F:    tools/testing/selftests/
  F:    Documentation/dev-tools/kselftest*
  
+ KERNEL UNIT TESTING FRAMEWORK (KUnit)
+ M:    Brendan Higgins <[email protected]>
+ L:    [email protected]
+ L:    [email protected]
+ W:    https://google.github.io/kunit-docs/third_party/kernel/docs/
+ S:    Maintained
+ F:    Documentation/dev-tools/kunit/
+ F:    include/kunit/
+ F:    lib/kunit/
+ F:    tools/testing/kunit/
  KERNEL USERMODE HELPER
  M:    Luis Chamberlain <[email protected]>
  L:    [email protected]
@@@ -9497,6 -9567,13 +9567,13 @@@ F:    Documentation/misc-devices/lis3lv02d
  F:    drivers/misc/lis3lv02d/
  F:    drivers/platform/x86/hp_accel.c
  
+ LIST KUNIT TEST
+ M:    David Gow <[email protected]>
+ L:    [email protected]
+ L:    [email protected]
+ S:    Maintained
+ F:    lib/list-test.c
  LIVE PATCHING
  M:    Josh Poimboeuf <[email protected]>
  M:    Jiri Kosina <[email protected]>
@@@ -9740,6 -9817,7 +9817,7 @@@ S:      Maintaine
  F:    drivers/net/dsa/mv88e6xxx/
  F:    include/linux/platform_data/mv88e6xxx.h
  F:    Documentation/devicetree/bindings/net/dsa/marvell.txt
+ F:    Documentation/networking/devlink-params-mv88e6xxx.txt
  
  MARVELL ARMADA DRM SUPPORT
  M:    Russell King <[email protected]>
@@@ -10536,15 -10614,13 +10614,13 @@@ F:        include/linux/vmalloc.
  F:    mm/
  
  MEMORY TECHNOLOGY DEVICES (MTD)
- M:    David Woodhouse <[email protected]>
- M:    Brian Norris <[email protected]>
- M:    Marek Vasut <[email protected]>
  M:    Miquel Raynal <[email protected]>
  M:    Richard Weinberger <[email protected]>
  M:    Vignesh Raghavendra <[email protected]>
  L:    [email protected]
  W:    http://www.linux-mtd.infradead.org/
  Q:    http://patchwork.ozlabs.org/project/linux-mtd/list/
+ C:    irc://irc.oftc.net/mtd
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd/fixes
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd/next
  S:    Maintained
@@@ -10821,6 -10897,7 +10897,7 @@@ M:   Microchip Linux Driver Support <UNGL
  L:    [email protected]
  S:    Supported
  F:    drivers/net/ethernet/mscc/
+ F:    include/soc/mscc/ocelot*
  
  MICROSOFT SURFACE PRO 3 BUTTON DRIVER
  M:    Chen Yu <[email protected]>
@@@ -10875,18 -10952,18 +10952,18 @@@ F:        arch/mips/include/asm/mach-loongson3
  F:    drivers/*/*loongson1*
  F:    drivers/*/*/*loongson1*
  
- MIPS/LOONGSON2 ARCHITECTURE
+ MIPS/LOONGSON2EF ARCHITECTURE
  M:    Jiaxun Yang <[email protected]>
  L:    [email protected]
  S:    Maintained
- F:    arch/mips/loongson64/fuloong-2e/
- F:    arch/mips/loongson64/lemote-2f/
- F:    arch/mips/include/asm/mach-loongson64/
+ F:    arch/mips/loongson2ef/
+ F:    arch/mips/include/asm/mach-loongson2ef/
  F:    drivers/*/*loongson2*
  F:    drivers/*/*/*loongson2*
  
- MIPS/LOONGSON3 ARCHITECTURE
+ MIPS/LOONGSON64 ARCHITECTURE
  M:    Huacai Chen <[email protected]>
+ M:    Jiaxun Yang <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    arch/mips/loongson64/
@@@ -11637,6 -11714,7 +11714,7 @@@ F:   drivers/nvme/target/fcloop.
  NVM EXPRESS TARGET DRIVER
  M:    Christoph Hellwig <[email protected]>
  M:    Sagi Grimberg <[email protected]>
+ M:    Chaitanya Kulkarni <[email protected]>
  L:    [email protected]
  T:    git://git.infradead.org/nvme.git
  W:    http://git.infradead.org/nvme.git
@@@ -12778,13 -12856,6 +12856,13 @@@ F: arch/*/events/
  F:    arch/*/events/*/*
  F:    tools/perf/
  
 +PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS
 +R:    John Garry <[email protected]>
 +R:    Will Deacon <[email protected]>
 +L:    [email protected] (moderated for non-subscribers)
 +S:    Supported
 +F:    tools/perf/pmu-events/arch/arm64/
 +
  PERSONALITY HANDLING
  M:    Christoph Hellwig <[email protected]>
  L:    [email protected]
@@@ -12842,6 -12913,7 +12920,7 @@@ S:   Maintaine
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git
  F:    samples/pidfd/
  F:    tools/testing/selftests/pidfd/
+ F:    tools/testing/selftests/clone3/
  K:    (?i)pidfd
  K:    (?i)clone3
  K:    \b(clone_args|kernel_clone_args)\b
@@@ -13139,12 -13211,14 +13218,14 @@@ F:        Documentation/filesystems/proc.tx
  PROC SYSCTL
  M:    Luis Chamberlain <[email protected]>
  M:    Kees Cook <[email protected]>
+ M:    Iurii Zaikin <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Maintained
  F:    fs/proc/proc_sysctl.c
  F:    include/linux/sysctl.h
  F:    kernel/sysctl.c
+ F:    kernel/sysctl-test.c
  F:    tools/testing/selftests/sysctl/
  
  PS3 NETWORK SUPPORT
@@@ -13828,7 -13902,7 +13909,7 @@@ R:   Sergei Shtylyov <sergei.shtylyov@cog
  L:    [email protected]
  L:    [email protected]
  F:    Documentation/devicetree/bindings/net/renesas,*.txt
- F:    Documentation/devicetree/bindings/net/sh_eth.txt
+ F:    Documentation/devicetree/bindings/net/renesas,*.yaml
  F:    drivers/net/ethernet/renesas/
  F:    include/linux/sh_eth.h
  
@@@ -15301,7 -15375,6 +15382,6 @@@ F:   arch/arm/boot/dts/spear
  F:    arch/arm/mach-spear/
  
  SPI NOR SUBSYSTEM
- M:    Marek Vasut <[email protected]>
  M:    Tudor Ambarus <[email protected]>
  L:    [email protected]
  W:    http://www.linux-mtd.infradead.org/
@@@ -16598,10 -16671,9 +16678,9 @@@ F:  drivers/media/pci/tw686x
  
  UBI FILE SYSTEM (UBIFS)
  M:    Richard Weinberger <[email protected]>
- M:    Artem Bityutskiy <[email protected]>
- M:    Adrian Hunter <[email protected]>
  L:    [email protected]
- T:    git git://git.infradead.org/ubifs-2.6.git
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git next
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git fixes
  W:    http://www.linux-mtd.infradead.org/doc/ubifs.html
  S:    Supported
  F:    Documentation/filesystems/ubifs.txt
@@@ -16716,11 -16788,11 +16795,11 @@@ S:        Maintaine
  F:    drivers/scsi/ufs/ufs-mediatek*
  
  UNSORTED BLOCK IMAGES (UBI)
- M:    Artem Bityutskiy <[email protected]>
  M:    Richard Weinberger <[email protected]>
  W:    http://www.linux-mtd.infradead.org/
  L:    [email protected]
- T:    git git://git.infradead.org/ubifs-2.6.git
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git next
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git fixes
  S:    Supported
  F:    drivers/mtd/ubi/
  F:    include/linux/mtd/ubi.h
@@@ -17222,6 -17294,7 +17301,7 @@@ F:   virt/lib
  
  VIRTIO AND VHOST VSOCK DRIVER
  M:    Stefan Hajnoczi <[email protected]>
+ M:    Stefano Garzarella <[email protected]>
  L:    [email protected]
  L:    [email protected]
  L:    [email protected]
@@@ -17353,6 -17426,14 +17433,14 @@@ S: Maintaine
  F:    drivers/input/serio/userio.c
  F:    include/uapi/linux/userio.h
  
+ VITESSE FELIX ETHERNET SWITCH DRIVER
+ M:    Vladimir Oltean <[email protected]>
+ M:    Claudiu Manoil <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    drivers/net/dsa/ocelot/*
+ F:    net/dsa/tag_ocelot.c
  VIVID VIRTUAL VIDEO DRIVER
  M:    Hans Verkuil <[email protected]>
  L:    [email protected]
@@@ -17453,6 -17534,18 +17541,18 @@@ S: Maintaine
  F:    drivers/net/vrf.c
  F:    Documentation/networking/vrf.txt
  
+ VSPRINTF
+ M:    Petr Mladek <[email protected]>
+ M:    Steven Rostedt <[email protected]>
+ M:    Sergey Senozhatsky <[email protected]>
+ R:    Andy Shevchenko <[email protected]>
+ R:    Rasmus Villemoes <[email protected]>
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk.git
+ S:    Maintained
+ F:    lib/vsprintf.c
+ F:    lib/test_printf.c
+ F:    Documentation/core-api/printk-formats.rst
  VT1211 HARDWARE MONITOR DRIVER
  M:    Juerg Haefliger <[email protected]>
  L:    [email protected]
index dc64b16e6b719b0d8f89227b17ec965bb2dc4586,937363b803c19d3a1c90b84a22eeec2969d08be1..3be51aa06e67ec2e5b893af82096b43db5c3f23a
@@@ -3315,17 -3315,27 +3315,28 @@@ static int intel_pmu_hw_config(struct p
        if (x86_pmu.version < 3)
                return -EINVAL;
  
 -      if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 -              return -EACCES;
 +      ret = perf_allow_cpu(&event->attr);
 +      if (ret)
 +              return ret;
  
        event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
  
        return 0;
  }
  
+ #ifdef CONFIG_RETPOLINE
+ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr);
+ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr);
+ #endif
  struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
  {
+ #ifdef CONFIG_RETPOLINE
+       if (x86_pmu.guest_get_msrs == intel_guest_get_msrs)
+               return intel_guest_get_msrs(nr);
+       else if (x86_pmu.guest_get_msrs == core_guest_get_msrs)
+               return core_guest_get_msrs(nr);
+ #endif
        if (x86_pmu.guest_get_msrs)
                return x86_pmu.guest_get_msrs(nr);
        *nr = 0;
@@@ -3820,12 -3830,6 +3831,12 @@@ static void intel_pmu_sched_task(struc
        intel_pmu_lbr_sched_task(ctx, sched_in);
  }
  
 +static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
 +                                  struct perf_event_context *next)
 +{
 +      intel_pmu_lbr_swap_task_ctx(prev, next);
 +}
 +
  static int intel_pmu_check_period(struct perf_event *event, u64 value)
  {
        return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
@@@ -3961,7 -3965,6 +3972,7 @@@ static __initconst const struct x86_pm
  
        .guest_get_msrs         = intel_guest_get_msrs,
        .sched_task             = intel_pmu_sched_task,
 +      .swap_task_ctx          = intel_pmu_swap_task_ctx,
  
        .check_period           = intel_pmu_check_period,
  
diff --combined arch/x86/kvm/x86.c
index 783aa8d141bfa4da0fcc045282da0e5769307dbe,3ed167e039e54f7089d88705ec1718d4d4840d8c..cf917139de6ba272f01aac132d9f0c8b3f2b2c2b
@@@ -68,7 -68,6 +68,7 @@@
  #include <asm/mshyperv.h>
  #include <asm/hypervisor.h>
  #include <asm/intel_pt.h>
 +#include <asm/emulate_prefix.h>
  #include <clocksource/hyperv_timer.h>
  
  #define CREATE_TRACE_POINTS
@@@ -177,6 -176,8 +177,8 @@@ struct kvm_shared_msrs 
  static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
  static struct kvm_shared_msrs __percpu *shared_msrs;
  
+ static u64 __read_mostly host_xss;
  struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "pf_fixed", VCPU_STAT(pf_fixed) },
        { "pf_guest", VCPU_STAT(pf_guest) },
@@@ -261,23 -262,6 +263,6 @@@ static void kvm_on_user_return(struct u
        }
  }
  
- static void shared_msr_update(unsigned slot, u32 msr)
- {
-       u64 value;
-       unsigned int cpu = smp_processor_id();
-       struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
-       /* only read, and nobody should modify it at this time,
-        * so don't need lock */
-       if (slot >= shared_msrs_global.nr) {
-               printk(KERN_ERR "kvm: invalid MSR slot!");
-               return;
-       }
-       rdmsrl_safe(msr, &value);
-       smsr->values[slot].host = value;
-       smsr->values[slot].curr = value;
- }
  void kvm_define_shared_msr(unsigned slot, u32 msr)
  {
        BUG_ON(slot >= KVM_NR_SHARED_MSRS);
@@@ -289,10 -273,16 +274,16 @@@ EXPORT_SYMBOL_GPL(kvm_define_shared_msr
  
  static void kvm_shared_msr_cpu_online(void)
  {
-       unsigned i;
+       unsigned int cpu = smp_processor_id();
+       struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+       u64 value;
+       int i;
  
-       for (i = 0; i < shared_msrs_global.nr; ++i)
-               shared_msr_update(i, shared_msrs_global.msrs[i]);
+       for (i = 0; i < shared_msrs_global.nr; ++i) {
+               rdmsrl_safe(shared_msrs_global.msrs[i], &value);
+               smsr->values[i].host = value;
+               smsr->values[i].curr = value;
+       }
  }
  
  int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
        struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
        int err;
  
-       if (((value ^ smsr->values[slot].curr) & mask) == 0)
+       value = (value & mask) | (smsr->values[slot].host & ~mask);
+       if (value == smsr->values[slot].curr)
                return 0;
-       smsr->values[slot].curr = value;
        err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
        if (err)
                return 1;
  
+       smsr->values[slot].curr = value;
        if (!smsr->registered) {
                smsr->urn.on_user_return = kvm_on_user_return;
                user_return_notifier_register(&smsr->urn);
@@@ -710,10 -701,8 +702,8 @@@ int load_pdptrs(struct kvm_vcpu *vcpu, 
        ret = 1;
  
        memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
-       __set_bit(VCPU_EXREG_PDPTR,
-                 (unsigned long *)&vcpu->arch.regs_avail);
-       __set_bit(VCPU_EXREG_PDPTR,
-                 (unsigned long *)&vcpu->arch.regs_dirty);
+       kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
  out:
  
        return ret;
@@@ -723,7 -712,6 +713,6 @@@ EXPORT_SYMBOL_GPL(load_pdptrs)
  bool pdptrs_changed(struct kvm_vcpu *vcpu)
  {
        u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
-       bool changed = true;
        int offset;
        gfn_t gfn;
        int r;
        if (!is_pae_paging(vcpu))
                return false;
  
-       if (!test_bit(VCPU_EXREG_PDPTR,
-                     (unsigned long *)&vcpu->arch.regs_avail))
+       if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
                return true;
  
        gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
        r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
                                       PFERR_USER_MASK | PFERR_WRITE_MASK);
        if (r < 0)
-               goto out;
-       changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
- out:
+               return true;
  
-       return changed;
+       return memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
  }
  EXPORT_SYMBOL_GPL(pdptrs_changed);
  
@@@ -813,27 -798,34 +799,34 @@@ void kvm_lmsw(struct kvm_vcpu *vcpu, un
  }
  EXPORT_SYMBOL_GPL(kvm_lmsw);
  
- void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
  {
-       if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
-                       !vcpu->guest_xcr0_loaded) {
-               /* kvm_set_xcr() also depends on this */
+       if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
                if (vcpu->arch.xcr0 != host_xcr0)
                        xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
-               vcpu->guest_xcr0_loaded = 1;
+               if (vcpu->arch.xsaves_enabled &&
+                   vcpu->arch.ia32_xss != host_xss)
+                       wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
        }
  }
- EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
+ EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
  
- void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
  {
-       if (vcpu->guest_xcr0_loaded) {
+       if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
                if (vcpu->arch.xcr0 != host_xcr0)
                        xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
-               vcpu->guest_xcr0_loaded = 0;
+               if (vcpu->arch.xsaves_enabled &&
+                   vcpu->arch.ia32_xss != host_xss)
+                       wrmsrl(MSR_IA32_XSS, host_xss);
        }
  }
- EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
+ EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
  
  static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
  {
@@@ -985,7 -977,7 +978,7 @@@ int kvm_set_cr3(struct kvm_vcpu *vcpu, 
  
        kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
        vcpu->arch.cr3 = cr3;
-       __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+       kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
  
        return 0;
  }
@@@ -1314,23 -1306,15 +1307,15 @@@ static u64 kvm_get_arch_capabilities(vo
                data |= ARCH_CAP_MDS_NO;
  
        /*
-        * On TAA affected systems, export MDS_NO=0 when:
-        *      - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
-        *      - Updated microcode is present. This is detected by
-        *        the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
-        *        that VERW clears CPU buffers.
-        *
-        * When MDS_NO=0 is exported, guests deploy clear CPU buffer
-        * mitigation and don't complain:
-        *
-        *      "Vulnerable: Clear CPU buffers attempted, no microcode"
-        *
-        * If TSX is disabled on the system, guests are also mitigated against
-        * TAA and clear CPU buffer mitigation is not required for guests.
+        * On TAA affected systems:
+        *      - nothing to do if TSX is disabled on the host.
+        *      - we emulate TSX_CTRL if present on the host.
+        *        This lets the guest use VERW to clear CPU buffers.
         */
-       if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
-           (data & ARCH_CAP_TSX_CTRL_MSR))
-               data &= ~ARCH_CAP_MDS_NO;
+       if (!boot_cpu_has(X86_FEATURE_RTM))
+               data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
+       else if (!boot_cpu_has_bug(X86_BUG_TAA))
+               data |= ARCH_CAP_TAA_NO;
  
        return data;
  }
@@@ -1478,8 -1462,8 +1463,8 @@@ static int __kvm_set_msr(struct kvm_vcp
   * Returns 0 on success, non-0 otherwise.
   * Assumes vcpu_load() was already called.
   */
static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
-                        bool host_initiated)
+ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+                 bool host_initiated)
  {
        struct msr_data msr;
        int ret;
@@@ -1554,20 -1538,25 +1539,25 @@@ static int do_set_msr(struct kvm_vcpu *
  }
  
  #ifdef CONFIG_X86_64
+ struct pvclock_clock {
+       int vclock_mode;
+       u64 cycle_last;
+       u64 mask;
+       u32 mult;
+       u32 shift;
+ };
  struct pvclock_gtod_data {
        seqcount_t      seq;
  
-       struct { /* extract of a clocksource struct */
-               int vclock_mode;
-               u64     cycle_last;
-               u64     mask;
-               u32     mult;
-               u32     shift;
-       } clock;
+       struct pvclock_clock clock; /* extract of a clocksource struct */
+       struct pvclock_clock raw_clock; /* extract of a clocksource struct */
  
+       u64             boot_ns_raw;
        u64             boot_ns;
        u64             nsec_base;
        u64             wall_time_sec;
+       u64             monotonic_raw_nsec;
  };
  
  static struct pvclock_gtod_data pvclock_gtod_data;
  static void update_pvclock_gtod(struct timekeeper *tk)
  {
        struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
-       u64 boot_ns;
+       u64 boot_ns, boot_ns_raw;
  
        boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
+       boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot));
  
        write_seqcount_begin(&vdata->seq);
  
        vdata->clock.mult               = tk->tkr_mono.mult;
        vdata->clock.shift              = tk->tkr_mono.shift;
  
+       vdata->raw_clock.vclock_mode    = tk->tkr_raw.clock->archdata.vclock_mode;
+       vdata->raw_clock.cycle_last     = tk->tkr_raw.cycle_last;
+       vdata->raw_clock.mask           = tk->tkr_raw.mask;
+       vdata->raw_clock.mult           = tk->tkr_raw.mult;
+       vdata->raw_clock.shift          = tk->tkr_raw.shift;
        vdata->boot_ns                  = boot_ns;
        vdata->nsec_base                = tk->tkr_mono.xtime_nsec;
  
        vdata->wall_time_sec            = tk->xtime_sec;
  
+       vdata->boot_ns_raw              = boot_ns_raw;
+       vdata->monotonic_raw_nsec       = tk->tkr_raw.xtime_nsec;
        write_seqcount_end(&vdata->seq);
  }
  #endif
@@@ -2016,21 -2015,21 +2016,21 @@@ static u64 read_tsc(void
        return last;
  }
  
- static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
+ static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
+                         int *mode)
  {
        long v;
-       struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
        u64 tsc_pg_val;
  
-       switch (gtod->clock.vclock_mode) {
+       switch (clock->vclock_mode) {
        case VCLOCK_HVCLOCK:
                tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
                                                  tsc_timestamp);
                if (tsc_pg_val != U64_MAX) {
                        /* TSC page valid */
                        *mode = VCLOCK_HVCLOCK;
-                       v = (tsc_pg_val - gtod->clock.cycle_last) &
-                               gtod->clock.mask;
+                       v = (tsc_pg_val - clock->cycle_last) &
+                               clock->mask;
                } else {
                        /* TSC page invalid */
                        *mode = VCLOCK_NONE;
        case VCLOCK_TSC:
                *mode = VCLOCK_TSC;
                *tsc_timestamp = read_tsc();
-               v = (*tsc_timestamp - gtod->clock.cycle_last) &
-                       gtod->clock.mask;
+               v = (*tsc_timestamp - clock->cycle_last) &
+                       clock->mask;
                break;
        default:
                *mode = VCLOCK_NONE;
        if (*mode == VCLOCK_NONE)
                *tsc_timestamp = v = 0;
  
-       return v * gtod->clock.mult;
+       return v * clock->mult;
  }
  
- static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
+ static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
  {
        struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
        unsigned long seq;
  
        do {
                seq = read_seqcount_begin(&gtod->seq);
-               ns = gtod->nsec_base;
-               ns += vgettsc(tsc_timestamp, &mode);
+               ns = gtod->monotonic_raw_nsec;
+               ns += vgettsc(&gtod->raw_clock, tsc_timestamp, &mode);
                ns >>= gtod->clock.shift;
-               ns += gtod->boot_ns;
+               ns += gtod->boot_ns_raw;
        } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
        *t = ns;
  
@@@ -2082,7 -2081,7 +2082,7 @@@ static int do_realtime(struct timespec6
                seq = read_seqcount_begin(&gtod->seq);
                ts->tv_sec = gtod->wall_time_sec;
                ns = gtod->nsec_base;
-               ns += vgettsc(tsc_timestamp, &mode);
+               ns += vgettsc(&gtod->clock, tsc_timestamp, &mode);
                ns >>= gtod->clock.shift;
        } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
  
@@@ -2099,7 -2098,7 +2099,7 @@@ static bool kvm_get_time_and_clockread(
        if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
                return false;
  
-       return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
+       return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
                                                      tsc_timestamp));
  }
  
@@@ -2722,6 -2721,20 +2722,20 @@@ int kvm_set_msr_common(struct kvm_vcpu 
        case MSR_IA32_TSC:
                kvm_write_tsc(vcpu, msr_info);
                break;
+       case MSR_IA32_XSS:
+               if (!msr_info->host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+                       return 1;
+               /*
+                * We do support PT if kvm_x86_ops->pt_supported(), but we do
+                * not support IA32_XSS[bit 8]. Guests will have to use
+                * RDMSR/WRMSR rather than XSAVES/XRSTORS to save/restore PT
+                * MSRs.
+                */
+               if (data != 0)
+                       return 1;
+               vcpu->arch.ia32_xss = data;
+               break;
        case MSR_SMI_COUNT:
                if (!msr_info->host_initiated)
                        return 1;
@@@ -3049,6 -3062,12 +3063,12 @@@ int kvm_get_msr_common(struct kvm_vcpu 
        case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
                return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
                                   msr_info->host_initiated);
+       case MSR_IA32_XSS:
+               if (!msr_info->host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+                       return 1;
+               msr_info->data = vcpu->arch.ia32_xss;
+               break;
        case MSR_K7_CLK_CTL:
                /*
                 * Provide expected ramp-up count for K7. All other
@@@ -3826,12 -3845,13 +3846,13 @@@ static int kvm_vcpu_ioctl_x86_set_vcpu_
                                vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
                        else
                                vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
-                       if (lapic_in_kernel(vcpu)) {
-                               if (events->smi.latched_init)
-                                       set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
-                               else
-                                       clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
-                       }
+               }
+               if (lapic_in_kernel(vcpu)) {
+                       if (events->smi.latched_init)
+                               set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+                       else
+                               clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
                }
        }
  
@@@ -4422,6 -4442,7 +4443,7 @@@ long kvm_arch_vcpu_ioctl(struct file *f
        case KVM_SET_NESTED_STATE: {
                struct kvm_nested_state __user *user_kvm_nested_state = argp;
                struct kvm_nested_state kvm_state;
+               int idx;
  
                r = -EINVAL;
                if (!kvm_x86_ops->set_nested_state)
                    && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
                        break;
  
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
        }
        case KVM_GET_SUPPORTED_HV_CPUID: {
@@@ -4947,9 -4970,6 +4971,6 @@@ set_identity_unlock
                if (!irqchip_kernel(kvm))
                        goto set_irqchip_out;
                r = kvm_vm_ioctl_set_irqchip(kvm, chip);
-               if (r)
-                       goto set_irqchip_out;
-               r = 0;
        set_irqchip_out:
                kfree(chip);
                break;
@@@ -5472,7 -5492,6 +5493,7 @@@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_
  
  int handle_ud(struct kvm_vcpu *vcpu)
  {
 +      static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
        int emul_type = EMULTYPE_TRAP_UD;
        char sig[5]; /* ud2; .ascii "kvm" */
        struct x86_exception e;
        if (force_emulation_prefix &&
            kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
                                sig, sizeof(sig), &e) == 0 &&
 -          memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
 +          memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
                kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
                emul_type = EMULTYPE_TRAP_UD_FORCED;
        }
@@@ -6138,7 -6157,7 +6159,7 @@@ static void emulator_set_smbase(struct 
  static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
                              u32 pmc)
  {
-       return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
+       return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
  }
  
  static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@@ -7868,6 -7887,19 +7889,19 @@@ static void process_smi(struct kvm_vcp
        kvm_make_request(KVM_REQ_EVENT, vcpu);
  }
  
+ void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+                                      unsigned long *vcpu_bitmap)
+ {
+       cpumask_var_t cpus;
+       zalloc_cpumask_var(&cpus, GFP_ATOMIC);
+       kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
+                                   vcpu_bitmap, cpus);
+       free_cpumask_var(cpus);
+ }
  void kvm_make_scan_ioapic_request(struct kvm *kvm)
  {
        kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
@@@ -7945,7 -7977,6 +7979,6 @@@ void kvm_vcpu_reload_apic_access_page(s
         */
        put_page(page);
  }
- EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
  
  void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
  {
@@@ -8704,8 -8735,12 +8737,12 @@@ int kvm_arch_vcpu_ioctl_set_mpstate(str
            mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
                goto out;
  
-       /* INITs are latched while in SMM */
-       if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
+       /*
+        * KVM_MP_STATE_INIT_RECEIVED means the processor is in
+        * INIT state; latched init should be reported using
+        * KVM_SET_VCPU_EVENTS, so reject it here.
+        */
+       if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
            (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
             mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
                goto out;
@@@ -8797,7 -8832,7 +8834,7 @@@ static int __set_sregs(struct kvm_vcpu 
        vcpu->arch.cr2 = sregs->cr2;
        mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
        vcpu->arch.cr3 = sregs->cr3;
-       __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+       kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
  
        kvm_set_cr8(vcpu, sregs->cr8);
  
@@@ -9324,6 -9359,9 +9361,9 @@@ int kvm_arch_hardware_setup(void
                kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
        }
  
+       if (boot_cpu_has(X86_FEATURE_XSAVES))
+               rdmsrl(MSR_IA32_XSS, host_xss);
        kvm_init_msr_list();
        return 0;
  }
@@@ -9377,7 -9415,7 +9417,7 @@@ int kvm_arch_vcpu_init(struct kvm_vcpu 
                goto fail_free_pio_data;
  
        if (irqchip_in_kernel(vcpu->kvm)) {
-               vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
+               vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm);
                r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
                if (r < 0)
                        goto fail_mmu_destroy;
@@@ -9446,7 -9484,13 +9486,13 @@@ void kvm_arch_vcpu_uninit(struct kvm_vc
  
  void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
  {
+       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
        vcpu->arch.l1tf_flush_l1d = true;
+       if (pmu->version && unlikely(pmu->event_count)) {
+               pmu->need_cleanup = true;
+               kvm_make_request(KVM_REQ_PMU, vcpu);
+       }
        kvm_x86_ops->sched_in(vcpu, cpu);
  }
  
index 34c7c69100265c9bfaf59470c04f52ba5c634292,a07bfdb7d8ea139792bb37f0ce042393acaadeee..6d4c22aee38483a34362e3d56c3c22108645fe1f
@@@ -56,7 -56,6 +56,7 @@@ struct perf_guest_info_callbacks 
  #include <linux/perf_regs.h>
  #include <linux/cgroup.h>
  #include <linux/refcount.h>
 +#include <linux/security.h>
  #include <asm/local.h>
  
  struct perf_callchain_entry {
@@@ -249,8 -248,6 +249,8 @@@ struct perf_event
  #define PERF_PMU_CAP_NO_EXCLUDE                       0x80
  #define PERF_PMU_CAP_AUX_OUTPUT                       0x100
  
 +struct perf_output_handle;
 +
  /**
   * struct pmu - generic performance monitoring unit
   */
@@@ -412,15 -409,6 +412,15 @@@ struct pmu 
         */
        size_t                          task_ctx_size;
  
 +      /*
 +       * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data)
 +       * can be synchronized using this function. See Intel LBR callstack support
 +       * implementation and Perf core context switch handling callbacks for usage
 +       * examples.
 +       */
 +      void (*swap_task_ctx)           (struct perf_event_context *prev,
 +                                       struct perf_event_context *next);
 +                                      /* optional */
  
        /*
         * Set up pmu-private data structures for an AUX area
         */
        void (*free_aux)                (void *aux); /* optional */
  
 +      /*
 +       * Take a snapshot of the AUX buffer without touching the event
 +       * state, so that preempting ->start()/->stop() callbacks does
 +       * not interfere with their logic. Called in PMI context.
 +       *
 +       * Returns the size of AUX data copied to the output handle.
 +       *
 +       * Optional.
 +       */
 +      long (*snapshot_aux)            (struct perf_event *event,
 +                                       struct perf_output_handle *handle,
 +                                       unsigned long size);
 +
        /*
         * Validate address range filters: make sure the HW supports the
         * requested configuration and number of filters; return 0 if the
@@@ -746,9 -721,6 +746,9 @@@ struct perf_event 
        struct perf_cgroup              *cgrp; /* cgroup event is attach to */
  #endif
  
 +#ifdef CONFIG_SECURITY
 +      void *security;
 +#endif
        struct list_head                sb_list;
  #endif /* CONFIG_PERF_EVENTS */
  };
@@@ -988,7 -960,6 +988,7 @@@ struct perf_sample_data 
                u32     reserved;
        }                               cpu_entry;
        struct perf_callchain_entry     *callchain;
 +      u64                             aux_size;
  
        /*
         * regs_user may point to task_pt_regs or to regs_user_copy, depending
@@@ -1270,41 -1241,19 +1270,41 @@@ extern int perf_cpu_time_max_percent_ha
  int perf_event_max_stack_handler(struct ctl_table *table, int write,
                                 void __user *buffer, size_t *lenp, loff_t *ppos);
  
 -static inline bool perf_paranoid_tracepoint_raw(void)
 +/* Access to perf_event_open(2) syscall. */
 +#define PERF_SECURITY_OPEN            0
 +
 +/* Finer grained perf_event_open(2) access control. */
 +#define PERF_SECURITY_CPU             1
 +#define PERF_SECURITY_KERNEL          2
 +#define PERF_SECURITY_TRACEPOINT      3
 +
 +static inline int perf_is_paranoid(void)
  {
        return sysctl_perf_event_paranoid > -1;
  }
  
 -static inline bool perf_paranoid_cpu(void)
 +static inline int perf_allow_kernel(struct perf_event_attr *attr)
  {
 -      return sysctl_perf_event_paranoid > 0;
 +      if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN))
 +              return -EACCES;
 +
 +      return security_perf_event_open(attr, PERF_SECURITY_KERNEL);
  }
  
 -static inline bool perf_paranoid_kernel(void)
 +static inline int perf_allow_cpu(struct perf_event_attr *attr)
  {
 -      return sysctl_perf_event_paranoid > 1;
 +      if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN))
 +              return -EACCES;
 +
 +      return security_perf_event_open(attr, PERF_SECURITY_CPU);
 +}
 +
 +static inline int perf_allow_tracepoint(struct perf_event_attr *attr)
 +{
 +      if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN))
 +              return -EPERM;
 +
 +      return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT);
  }
  
  extern void perf_event_init(void);
@@@ -1378,9 -1327,6 +1378,9 @@@ extern unsigned int perf_output_copy(st
                             const void *buf, unsigned int len);
  extern unsigned int perf_output_skip(struct perf_output_handle *handle,
                                     unsigned int len);
 +extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
 +                               struct perf_output_handle *handle,
 +                               unsigned long from, unsigned long to);
  extern int perf_swevent_get_recursion_context(void);
  extern void perf_swevent_put_recursion_context(int rctx);
  extern u64 perf_swevent_set_period(struct perf_event *event);
@@@ -1390,6 -1336,8 +1390,8 @@@ extern void perf_event_disable_local(st
  extern void perf_event_disable_inatomic(struct perf_event *event);
  extern void perf_event_task_tick(void);
  extern int perf_event_account_interrupt(struct perf_event *event);
+ extern int perf_event_period(struct perf_event *event, u64 value);
+ extern u64 perf_event_pause(struct perf_event *event, bool reset);
  #else /* !CONFIG_PERF_EVENTS: */
  static inline void *
  perf_aux_output_begin(struct perf_output_handle *handle,
@@@ -1469,6 -1417,14 +1471,14 @@@ static inline void perf_event_disable(s
  static inline int __perf_event_disable(void *info)                    { return -1; }
  static inline void perf_event_task_tick(void)                         { }
  static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
+ static inline int perf_event_period(struct perf_event *event, u64 value)
+ {
+       return -EINVAL;
+ }
+ static inline u64 perf_event_pause(struct perf_event *event, bool reset)
+ {
+       return 0;
+ }
  #endif
  
  #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
diff --combined kernel/events/core.c
index 059ee711600843261ac8cae416eb8370bdb43a71,5de0b801bc7bc189645f8163f3ba06158418668a..4ff86d57f9e5309905e7e5cffe5a1eb875b45e30
@@@ -1941,11 -1941,6 +1941,11 @@@ static void perf_put_aux_event(struct p
        }
  }
  
 +static bool perf_need_aux_event(struct perf_event *event)
 +{
 +      return !!event->attr.aux_output || !!event->attr.aux_sample_size;
 +}
 +
  static int perf_get_aux_event(struct perf_event *event,
                              struct perf_event *group_leader)
  {
        if (!group_leader)
                return 0;
  
 -      if (!perf_aux_output_match(event, group_leader))
 +      /*
 +       * aux_output and aux_sample_size are mutually exclusive.
 +       */
 +      if (event->attr.aux_output && event->attr.aux_sample_size)
 +              return 0;
 +
 +      if (event->attr.aux_output &&
 +          !perf_aux_output_match(event, group_leader))
 +              return 0;
 +
 +      if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux)
                return 0;
  
        if (!atomic_long_inc_not_zero(&group_leader->refcount))
@@@ -2681,25 -2666,6 +2681,25 @@@ perf_install_in_context(struct perf_eve
         */
        smp_store_release(&event->ctx, ctx);
  
 +      /*
 +       * perf_event_attr::disabled events will not run and can be initialized
 +       * without IPI. Except when this is the first event for the context, in
 +       * that case we need the magic of the IPI to set ctx->is_active.
 +       *
 +       * The IOC_ENABLE that is sure to follow the creation of a disabled
 +       * event will issue the IPI and reprogram the hardware.
 +       */
 +      if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
 +              raw_spin_lock_irq(&ctx->lock);
 +              if (ctx->task == TASK_TOMBSTONE) {
 +                      raw_spin_unlock_irq(&ctx->lock);
 +                      return;
 +              }
 +              add_event_to_ctx(event, ctx);
 +              raw_spin_unlock_irq(&ctx->lock);
 +              return;
 +      }
 +
        if (!task) {
                cpu_function_call(cpu, __perf_install_in_context, event);
                return;
@@@ -3238,21 -3204,10 +3238,21 @@@ static void perf_event_context_sched_ou
                raw_spin_lock(&ctx->lock);
                raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
                if (context_equiv(ctx, next_ctx)) {
 +                      struct pmu *pmu = ctx->pmu;
 +
                        WRITE_ONCE(ctx->task, next);
                        WRITE_ONCE(next_ctx->task, task);
  
 -                      swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
 +                      /*
 +                       * PMU specific parts of task perf context can require
 +                       * additional synchronization. As an example of such
 +                       * synchronization see implementation details of Intel
 +                       * LBR call stack data profiling;
 +                       */
 +                      if (pmu->swap_task_ctx)
 +                              pmu->swap_task_ctx(ctx, next_ctx);
 +                      else
 +                              swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
  
                        /*
                         * RCU_INIT_POINTER here is safe because we've not
@@@ -4274,9 -4229,8 +4274,9 @@@ find_get_context(struct pmu *pmu, struc
  
        if (!task) {
                /* Must be root to operate on a CPU event: */
 -              if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 -                      return ERR_PTR(-EACCES);
 +              err = perf_allow_cpu(&event->attr);
 +              if (err)
 +                      return ERR_PTR(err);
  
                cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
                ctx = &cpuctx->ctx;
@@@ -4585,8 -4539,6 +4585,8 @@@ static void _free_event(struct perf_eve
  
        unaccount_event(event);
  
 +      security_perf_event_free(event);
 +
        if (event->rb) {
                /*
                 * Can happen when we close an event with re-directed output.
@@@ -5040,10 -4992,6 +5040,10 @@@ perf_read(struct file *file, char __use
        struct perf_event_context *ctx;
        int ret;
  
 +      ret = security_perf_event_read(event);
 +      if (ret)
 +              return ret;
 +
        ctx = perf_event_ctx_lock(event);
        ret = __perf_read(event, buf, count);
        perf_event_ctx_unlock(event, ctx);
@@@ -5081,6 -5029,24 +5081,24 @@@ static void _perf_event_reset(struct pe
        perf_event_update_userpage(event);
  }
  
+ /* Assume it's not an event with inherit set. */
+ u64 perf_event_pause(struct perf_event *event, bool reset)
+ {
+       struct perf_event_context *ctx;
+       u64 count;
+       ctx = perf_event_ctx_lock(event);
+       WARN_ON_ONCE(event->attr.inherit);
+       _perf_event_disable(event);
+       count = local64_read(&event->count);
+       if (reset)
+               local64_set(&event->count, 0);
+       perf_event_ctx_unlock(event, ctx);
+       return count;
+ }
+ EXPORT_SYMBOL_GPL(perf_event_pause);
  /*
   * Holding the top-level event's child_mutex means that any
   * descendant process that has inherited this event will block
@@@ -5158,16 -5124,11 +5176,11 @@@ static int perf_event_check_period(stru
        return event->pmu->check_period(event, value);
  }
  
- static int perf_event_period(struct perf_event *event, u64 __user *arg)
+ static int _perf_event_period(struct perf_event *event, u64 value)
  {
-       u64 value;
        if (!is_sampling_event(event))
                return -EINVAL;
  
-       if (copy_from_user(&value, arg, sizeof(value)))
-               return -EFAULT;
        if (!value)
                return -EINVAL;
  
        return 0;
  }
  
+ int perf_event_period(struct perf_event *event, u64 value)
+ {
+       struct perf_event_context *ctx;
+       int ret;
+       ctx = perf_event_ctx_lock(event);
+       ret = _perf_event_period(event, value);
+       perf_event_ctx_unlock(event, ctx);
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(perf_event_period);
  static const struct file_operations perf_fops;
  
  static inline int perf_fget_light(int fd, struct fd *p)
@@@ -5228,8 -5202,14 +5254,14 @@@ static long _perf_ioctl(struct perf_eve
                return _perf_event_refresh(event, arg);
  
        case PERF_EVENT_IOC_PERIOD:
-               return perf_event_period(event, (u64 __user *)arg);
+       {
+               u64 value;
  
+               if (copy_from_user(&value, (u64 __user *)arg, sizeof(value)))
+                       return -EFAULT;
+               return _perf_event_period(event, value);
+       }
        case PERF_EVENT_IOC_ID:
        {
                u64 id = primary_event_id(event);
@@@ -5308,11 -5288,6 +5340,11 @@@ static long perf_ioctl(struct file *fil
        struct perf_event_context *ctx;
        long ret;
  
 +      /* Treat ioctl like writes as it is likely a mutating operation. */
 +      ret = security_perf_event_write(event);
 +      if (ret)
 +              return ret;
 +
        ctx = perf_event_ctx_lock(event);
        ret = _perf_ioctl(event, cmd, arg);
        perf_event_ctx_unlock(event, ctx);
@@@ -5664,8 -5639,10 +5696,8 @@@ static void perf_mmap_close(struct vm_a
                perf_pmu_output_stop(event);
  
                /* now it's safe to free the pages */
 -              if (!rb->aux_mmap_locked)
 -                      atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
 -              else
 -                      atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
 +              atomic_long_sub(rb->aux_nr_pages - rb->aux_mmap_locked, &mmap_user->locked_vm);
 +              atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
  
                /* this has to be the last one */
                rb_free_aux(rb);
@@@ -5776,10 -5753,6 +5808,10 @@@ static int perf_mmap(struct file *file
        if (!(vma->vm_flags & VM_SHARED))
                return -EINVAL;
  
 +      ret = security_perf_event_read(event);
 +      if (ret)
 +              return ret;
 +
        vma_size = vma->vm_end - vma->vm_start;
  
        if (vma->vm_pgoff == 0) {
@@@ -5886,7 -5859,13 +5918,7 @@@ accounting
  
        user_locked = atomic_long_read(&user->locked_vm) + user_extra;
  
 -      if (user_locked <= user_lock_limit) {
 -              /* charge all to locked_vm */
 -      } else if (atomic_long_read(&user->locked_vm) >= user_lock_limit) {
 -              /* charge all to pinned_vm */
 -              extra = user_extra;
 -              user_extra = 0;
 -      } else {
 +      if (user_locked > user_lock_limit) {
                /*
                 * charge locked_vm until it hits user_lock_limit;
                 * charge the rest from pinned_vm
        lock_limit >>= PAGE_SHIFT;
        locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
  
 -      if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
 +      if ((locked > lock_limit) && perf_is_paranoid() &&
                !capable(CAP_IPC_LOCK)) {
                ret = -EPERM;
                goto unlock;
@@@ -6229,122 -6208,6 +6261,122 @@@ perf_output_sample_ustack(struct perf_o
        }
  }
  
 +static unsigned long perf_prepare_sample_aux(struct perf_event *event,
 +                                        struct perf_sample_data *data,
 +                                        size_t size)
 +{
 +      struct perf_event *sampler = event->aux_event;
 +      struct ring_buffer *rb;
 +
 +      data->aux_size = 0;
 +
 +      if (!sampler)
 +              goto out;
 +
 +      if (WARN_ON_ONCE(READ_ONCE(sampler->state) != PERF_EVENT_STATE_ACTIVE))
 +              goto out;
 +
 +      if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id()))
 +              goto out;
 +
 +      rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
 +      if (!rb)
 +              goto out;
 +
 +      /*
 +       * If this is an NMI hit inside sampling code, don't take
 +       * the sample. See also perf_aux_sample_output().
 +       */
 +      if (READ_ONCE(rb->aux_in_sampling)) {
 +              data->aux_size = 0;
 +      } else {
 +              size = min_t(size_t, size, perf_aux_size(rb));
 +              data->aux_size = ALIGN(size, sizeof(u64));
 +      }
 +      ring_buffer_put(rb);
 +
 +out:
 +      return data->aux_size;
 +}
 +
 +long perf_pmu_snapshot_aux(struct ring_buffer *rb,
 +                         struct perf_event *event,
 +                         struct perf_output_handle *handle,
 +                         unsigned long size)
 +{
 +      unsigned long flags;
 +      long ret;
 +
 +      /*
 +       * Normal ->start()/->stop() callbacks run in IRQ mode in scheduler
 +       * paths. If we start calling them in NMI context, they may race with
 +       * the IRQ ones, that is, for example, re-starting an event that's just
 +       * been stopped, which is why we're using a separate callback that
 +       * doesn't change the event state.
 +       *
 +       * IRQs need to be disabled to prevent IPIs from racing with us.
 +       */
 +      local_irq_save(flags);
 +      /*
 +       * Guard against NMI hits inside the critical section;
 +       * see also perf_prepare_sample_aux().
 +       */
 +      WRITE_ONCE(rb->aux_in_sampling, 1);
 +      barrier();
 +
 +      ret = event->pmu->snapshot_aux(event, handle, size);
 +
 +      barrier();
 +      WRITE_ONCE(rb->aux_in_sampling, 0);
 +      local_irq_restore(flags);
 +
 +      return ret;
 +}
 +
 +static void perf_aux_sample_output(struct perf_event *event,
 +                                 struct perf_output_handle *handle,
 +                                 struct perf_sample_data *data)
 +{
 +      struct perf_event *sampler = event->aux_event;
 +      unsigned long pad;
 +      struct ring_buffer *rb;
 +      long size;
 +
 +      if (WARN_ON_ONCE(!sampler || !data->aux_size))
 +              return;
 +
 +      rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
 +      if (!rb)
 +              return;
 +
 +      size = perf_pmu_snapshot_aux(rb, sampler, handle, data->aux_size);
 +
 +      /*
 +       * An error here means that perf_output_copy() failed (returned a
 +       * non-zero surplus that it didn't copy), which in its current
 +       * enlightened implementation is not possible. If that changes, we'd
 +       * like to know.
 +       */
 +      if (WARN_ON_ONCE(size < 0))
 +              goto out_put;
 +
 +      /*
 +       * The pad comes from ALIGN()ing data->aux_size up to u64 in
 +       * perf_prepare_sample_aux(), so should not be more than that.
 +       */
 +      pad = data->aux_size - size;
 +      if (WARN_ON_ONCE(pad >= sizeof(u64)))
 +              pad = 8;
 +
 +      if (pad) {
 +              u64 zero = 0;
 +              perf_output_copy(handle, &zero, pad);
 +      }
 +
 +out_put:
 +      ring_buffer_put(rb);
 +}
 +
  static void __perf_event_header__init_id(struct perf_event_header *header,
                                         struct perf_sample_data *data,
                                         struct perf_event *event)
@@@ -6664,13 -6527,6 +6696,13 @@@ void perf_output_sample(struct perf_out
        if (sample_type & PERF_SAMPLE_PHYS_ADDR)
                perf_output_put(handle, data->phys_addr);
  
 +      if (sample_type & PERF_SAMPLE_AUX) {
 +              perf_output_put(handle, data->aux_size);
 +
 +              if (data->aux_size)
 +                      perf_aux_sample_output(event, handle, data);
 +      }
 +
        if (!event->attr.watermark) {
                int wakeup_events = event->attr.wakeup_events;
  
@@@ -6859,35 -6715,6 +6891,35 @@@ void perf_prepare_sample(struct perf_ev
  
        if (sample_type & PERF_SAMPLE_PHYS_ADDR)
                data->phys_addr = perf_virt_to_phys(data->addr);
 +
 +      if (sample_type & PERF_SAMPLE_AUX) {
 +              u64 size;
 +
 +              header->size += sizeof(u64); /* size */
 +
 +              /*
 +               * Given the 16bit nature of header::size, an AUX sample can
 +               * easily overflow it, what with all the preceding sample bits.
 +               * Make sure this doesn't happen by using up to U16_MAX bytes
 +               * per sample in total (rounded down to 8 byte boundary).
 +               */
 +              size = min_t(size_t, U16_MAX - header->size,
 +                           event->attr.aux_sample_size);
 +              size = rounddown(size, 8);
 +              size = perf_prepare_sample_aux(event, data, size);
 +
 +              WARN_ON_ONCE(size + header->size > U16_MAX);
 +              header->size += size;
 +      }
 +      /*
 +       * If you're adding more sample types here, you likely need to do
 +       * something about the overflowing header::size, like repurpose the
 +       * lowest 3 bits of size, which should be always zero at the moment.
 +       * This raises a more important question, do we really need 512k sized
 +       * samples and why, so good argumentation is in order for whatever you
 +       * do here next.
 +       */
 +      WARN_ON_ONCE(header->size & 7);
  }
  
  static __always_inline int
@@@ -10239,7 -10066,7 +10271,7 @@@ static struct lock_class_key cpuctx_loc
  
  int perf_pmu_register(struct pmu *pmu, const char *name, int type)
  {
 -      int cpu, ret;
 +      int cpu, ret, max = PERF_TYPE_MAX;
  
        mutex_lock(&pmus_lock);
        ret = -ENOMEM;
                goto skip_type;
        pmu->name = name;
  
 -      if (type < 0) {
 -              type = idr_alloc(&pmu_idr, pmu, PERF_TYPE_MAX, 0, GFP_KERNEL);
 -              if (type < 0) {
 -                      ret = type;
 +      if (type != PERF_TYPE_SOFTWARE) {
 +              if (type >= 0)
 +                      max = type;
 +
 +              ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
 +              if (ret < 0)
                        goto free_pdc;
 -              }
 +
 +              WARN_ON(type >= 0 && ret != type);
 +
 +              type = ret;
        }
        pmu->type = type;
  
@@@ -10339,16 -10161,7 +10371,16 @@@ got_cpu_context
        if (!pmu->event_idx)
                pmu->event_idx = perf_event_idx_default;
  
 -      list_add_rcu(&pmu->entry, &pmus);
 +      /*
 +       * Ensure the TYPE_SOFTWARE PMUs are at the head of the list,
 +       * since these cannot be in the IDR. This way the linear search
 +       * is fast, provided a valid software event is provided.
 +       */
 +      if (type == PERF_TYPE_SOFTWARE || !name)
 +              list_add_rcu(&pmu->entry, &pmus);
 +      else
 +              list_add_tail_rcu(&pmu->entry, &pmus);
 +
        atomic_set(&pmu->exclusive_cnt, 0);
        ret = 0;
  unlock:
@@@ -10361,7 -10174,7 +10393,7 @@@ free_dev
        put_device(pmu->dev);
  
  free_idr:
 -      if (pmu->type >= PERF_TYPE_MAX)
 +      if (pmu->type != PERF_TYPE_SOFTWARE)
                idr_remove(&pmu_idr, pmu->type);
  
  free_pdc:
@@@ -10383,7 -10196,7 +10415,7 @@@ void perf_pmu_unregister(struct pmu *pm
        synchronize_rcu();
  
        free_percpu(pmu->pmu_disable_count);
 -      if (pmu->type >= PERF_TYPE_MAX)
 +      if (pmu->type != PERF_TYPE_SOFTWARE)
                idr_remove(&pmu_idr, pmu->type);
        if (pmu_bus_running) {
                if (pmu->nr_addr_filters)
@@@ -10453,8 -10266,9 +10485,8 @@@ static int perf_try_init_event(struct p
  
  static struct pmu *perf_init_event(struct perf_event *event)
  {
 +      int idx, type, ret;
        struct pmu *pmu;
 -      int idx;
 -      int ret;
  
        idx = srcu_read_lock(&pmus_srcu);
  
                        goto unlock;
        }
  
 +      /*
 +       * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
 +       * are often aliases for PERF_TYPE_RAW.
 +       */
 +      type = event->attr.type;
 +      if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE)
 +              type = PERF_TYPE_RAW;
 +
 +again:
        rcu_read_lock();
 -      pmu = idr_find(&pmu_idr, event->attr.type);
 +      pmu = idr_find(&pmu_idr, type);
        rcu_read_unlock();
        if (pmu) {
                ret = perf_try_init_event(pmu, event);
 +              if (ret == -ENOENT && event->attr.type != type) {
 +                      type = event->attr.type;
 +                      goto again;
 +              }
 +
                if (ret)
                        pmu = ERR_PTR(ret);
 +
                goto unlock;
        }
  
@@@ -10710,12 -10509,9 +10742,9 @@@ perf_event_alloc(struct perf_event_att
                context = parent_event->overflow_handler_context;
  #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
                if (overflow_handler == bpf_overflow_handler) {
-                       struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
+                       struct bpf_prog *prog = parent_event->prog;
  
-                       if (IS_ERR(prog)) {
-                               err = PTR_ERR(prog);
-                               goto err_ns;
-                       }
+                       bpf_prog_inc(prog);
                        event->prog = prog;
                        event->orig_overflow_handler =
                                parent_event->orig_overflow_handler;
                }
        }
  
 +      err = security_perf_event_alloc(event);
 +      if (err)
 +              goto err_callchain_buffer;
 +
        /* symmetric to unaccount_event() in _free_event() */
        account_event(event);
  
        return event;
  
 +err_callchain_buffer:
 +      if (!event->parent) {
 +              if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
 +                      put_callchain_buffers();
 +      }
  err_addr_filters:
        kfree(event->addr_filter_ranges);
  
@@@ -10886,7 -10673,7 +10915,7 @@@ static int perf_copy_attr(struct perf_e
  
        attr->size = size;
  
 -      if (attr->__reserved_1 || attr->__reserved_2)
 +      if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
                return -EINVAL;
  
        if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
                        attr->branch_sample_type = mask;
                }
                /* privileged levels capture (kernel, hv): check permissions */
 -              if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
 -                  && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
 -                      return -EACCES;
 +              if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) {
 +                      ret = perf_allow_kernel(attr);
 +                      if (ret)
 +                              return ret;
 +              }
        }
  
        if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
@@@ -11141,19 -10926,13 +11170,19 @@@ SYSCALL_DEFINE5(perf_event_open
        if (flags & ~PERF_FLAG_ALL)
                return -EINVAL;
  
 +      /* Do we allow access to perf_event_open(2) ? */
 +      err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
 +      if (err)
 +              return err;
 +
        err = perf_copy_attr(attr_uptr, &attr);
        if (err)
                return err;
  
        if (!attr.exclude_kernel) {
 -              if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
 -                      return -EACCES;
 +              err = perf_allow_kernel(&attr);
 +              if (err)
 +                      return err;
        }
  
        if (attr.namespaces) {
        }
  
        /* Only privileged users can get physical addresses */
 -      if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
 -          perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
 -              return -EACCES;
 +      if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) {
 +              err = perf_allow_kernel(&attr);
 +              if (err)
 +                      return err;
 +      }
  
        err = security_locked_down(LOCKDOWN_PERF);
        if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR))
                }
        }
  
 -      if (event->attr.aux_output && !perf_get_aux_event(event, group_leader))
 +      if (perf_need_aux_event(event) && !perf_get_aux_event(event, group_leader))
                goto err_locked;
  
        /*
This page took 0.241859 seconds and 4 git commands to generate.