Merge tag 'powerpc-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...

author Linus Torvalds <[email protected]>

Fri, 20 Sep 2019 18:48:06 +0000 (11:48 -0700)

committer Linus Torvalds <[email protected]>

Fri, 20 Sep 2019 18:48:06 +0000 (11:48 -0700)
author Linus Torvalds <[email protected]>
Fri, 20 Sep 2019 18:48:06 +0000 (11:48 -0700)
committer Linus Torvalds <[email protected]>
Fri, 20 Sep 2019 18:48:06 +0000 (11:48 -0700)
diff --combined Documentation/admin-guide/kernel-parameters.txt

index 782e9072407b94f6f39d4135c5adc361b622d45b,3cd757f9feaa1583a11c2688a66fcabf5d28686a..d3814789304fac0cada06423faf3ab590a2d9876
--- 1/Documentation/admin-guide/kernel-parameters.txt
--- 2/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@@ -860,6 -860,10 +860,10 @@@
         disable_radix   [PPC]
                         Disable RADIX MMU mode on POWER9
   
+       disable_tlbie   [PPC]
+                       Disable TLBIE instruction. Currently does not work
+                       with KVM, with HASH MMU, or with coherent accelerators.
+ 
         disable_cpu_apicid= [X86,APIC,SMP]
                         Format: <int>
                         The number of initial APIC ID for the
@@@ -1044,10 -1048,6 +1048,10 @@@
                         specified address. The serial port must already be
                         setup and configured. Options are not yet supported.
   
+ +              sbi
+ +                      Use RISC-V SBI (Supervisor Binary Interface) for early
+ +                      console.
+ +
                 smh     Use ARM semihosting calls for early console.
   
                 s3c2410,<addr>
@@@ -1094,12 -1094,6 +1098,12 @@@
                         the framebuffer, pass the 'ram' option so that it is
                         mapped with the correct attributes.
   
+ +              linflex,<addr>
+ +                      Use early console provided by Freescale LinFlex UART
+ +                      serial driver for NXP S32V234 SoCs. A valid base
+ +                      address must be provided, and the serial port must
+ +                      already be setup and configured.
+ +
         earlyprintk=    [X86,SH,ARM,M68k,S390]
                         earlyprintk=vga
                         earlyprintk=sclp
@@@ -1207,6 -1201,12 +1211,6 @@@
                         See comment before function elanfreq_setup() in
                         arch/x86/kernel/cpu/cpufreq/elanfreq.c.
   
- -      elevator=       [IOSCHED]
- -                      Format: { "mq-deadline" | "kyber" | "bfq" }
- -                      See Documentation/block/deadline-iosched.rst,
- -                      Documentation/block/kyber-iosched.rst and
- -                      Documentation/block/bfq-iosched.rst for details.
- -
         elfcorehdr=[size[KMG]@]offset[KMG] [IA64,PPC,SH,X86,S390]
                         Specifies physical address of start of kernel core
                         image elf header and optionally the size. Generally
@@@ -1736,11 -1736,6 +1740,11 @@@
                         Note that using this option lowers the security
                         provided by tboot because it makes the system
                         vulnerable to DMA attacks.
+ +              nobounce [Default off]
+ +                      Disable bounce buffer for unstrusted devices such as
+ +                      the Thunderbolt devices. This will treat the untrusted
+ +                      devices as the trusted ones, hence might expose security
+ +                      risks of DMA attacks.
   
         intel_idle.max_cstate=  [KNL,HW,ACPI,X86]
                         0       disables intel_idle and fall back on acpi_idle.
@@@ -1820,7 -1815,7 +1824,7 @@@
                           synchronously.
   
         iommu.passthrough=
- -                      [ARM64] Configure DMA to bypass the IOMMU by default.
+ +                      [ARM64, X86] Configure DMA to bypass the IOMMU by default.
                         Format: { "0" | "1" }
                         0 - Use IOMMU translation for DMA.
                         1 - Bypass the IOMMU for DMA.
@@@ -2382,7 -2377,7 +2386,7 @@@
   
         machvec=        [IA-64] Force the use of a particular machine-vector
                         (machvec) in a generic kernel.
- -                      Example: machvec=hpzx1_swiotlb
+ +                      Example: machvec=hpzx1
   
         machtype=       [Loongson] Share the same kernel image file between different
                          yeeloong laptop.
@@@ -2613,7 -2608,7 +2617,7 @@@
                                 expose users to several CPU vulnerabilities.
                                 Equivalent to: nopti [X86,PPC]
                                                kpti=0 [ARM64]
- -                                             nospectre_v1 [PPC]
+ +                                             nospectre_v1 [X86,PPC]
                                                nobp=0 [S390]
                                                nospectre_v2 [X86,PPC,S390,ARM64]
                                                spectre_v2_user=off [X86]
@@@ -2974,9 -2969,9 +2978,9 @@@
                         nosmt=force: Force disable SMT, cannot be undone
                                      via the sysfs control file.
   
- -      nospectre_v1    [PPC] Disable mitigations for Spectre Variant 1 (bounds
- -                      check bypass). With this option data leaks are possible
- -                      in the system.
+ +      nospectre_v1    [X86,PPC] Disable mitigations for Spectre Variant 1
+ +                      (bounds check bypass). With this option data leaks are
+ +                      possible in the system.
   
         nospectre_v2    [X86,PPC_FSL_BOOK3E,ARM64] Disable all mitigations for
                         the Spectre variant 2 (indirect branch prediction)
@@@ -3846,13 -3841,12 +3850,13 @@@
                         RCU_BOOST is not set, valid values are 0-99 and
                         the default is zero (non-realtime operation).
   
- -      rcutree.rcu_nocb_leader_stride= [KNL]
- -                      Set the number of NOCB kthread groups, which
- -                      defaults to the square root of the number of
- -                      CPUs.  Larger numbers reduces the wakeup overhead
- -                      on the per-CPU grace-period kthreads, but increases
- -                      that same overhead on each group's leader.
+ +      rcutree.rcu_nocb_gp_stride= [KNL]
+ +                      Set the number of NOCB callback kthreads in
+ +                      each group, which defaults to the square root
+ +                      of the number of CPUs.  Larger numbers reduce
+ +                      the wakeup overhead on the global grace-period
+ +                      kthread, but increases that same overhead on
+ +                      each group's NOCB grace-period kthread.
   
         rcutree.qhimark= [KNL]
                         Set threshold of queued RCU callbacks beyond which
@@@ -4057,10 -4051,6 +4061,10 @@@
         rcutorture.verbose= [KNL]
                         Enable additional printk() statements.
   
+ +      rcupdate.rcu_cpu_stall_ftrace_dump= [KNL]
+ +                      Dump ftrace buffer after reporting RCU CPU
+ +                      stall warning.
+ +
         rcupdate.rcu_cpu_stall_suppress= [KNL]
                         Suppress RCU CPU stall warning messages.
   
@@@ -4104,13 -4094,6 +4108,13 @@@
                         Run specified binary instead of /init from the ramdisk,
                         used for early userspace startup. See initrd.
   
+ +      rdrand=         [X86]
+ +                      force - Override the decision by the kernel to hide the
+ +                              advertisement of RDRAND support (this affects
+ +                              certain AMD processors because of buggy BIOS
+ +                              support, specifically around the suspend/resume
+ +                              path).
+ +
         rdt=            [HW,X86,RDT]
                         Turn on/off individual RDT features. List is:
                         cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
@@@ -4641,6 -4624,11 +4645,11 @@@
                         /sys/power/pm_test). Only available when CONFIG_PM_DEBUG
                         is set. Default value is 5.
   
+       svm=            [PPC]
+                       Format: { on | off | y | n | 1 | 0 }
+                       This parameter controls use of the Protected
+                       Execution Facility on pSeries.
+ 
         swapaccount=[0|1]
                         [KNL] Enable accounting of swap in memory resource
                         controller if no parameter or 1 is given or disable
@@@ -5326,3 -5314,22 +5335,22 @@@
                         A hex value specifying bitmask with supplemental xhci
                         host controller quirks. Meaning of each bit can be
                         consulted in header drivers/usb/host/xhci.h.
+ 
+       xmon            [PPC]
+                       Format: { early | on | rw | ro | off }
+                       Controls if xmon debugger is enabled. Default is off.
+                       Passing only "xmon" is equivalent to "xmon=early".
+                       early   Call xmon as early as possible on boot; xmon
+                               debugger is called from setup_arch().
+                       on      xmon debugger hooks will be installed so xmon
+                               is only called on a kernel crash. Default mode,
+                               i.e. either "ro" or "rw" mode, is controlled
+                               with CONFIG_XMON_DEFAULT_RO_MODE.
+                       rw      xmon debugger hooks will be installed so xmon
+                               is called only on a kernel crash, mode is write,
+                               meaning SPR registers, memory and, other data
+                               can be written using xmon commands.
+                       ro      same as "rw" option above but SPR registers,
+                               memory, and other data can't be written using
+                               xmon commands.
+                       off     xmon is disabled.
diff --combined arch/Kconfig

index f2a3dc80d46bdce3db1873772cf8ffc44cd8265c,89e2e3f64f791960715e43552b67f9f29ce6e632..0fcf8ec1e09883a2fbab69f02f02068dd913afc3
--- 1/arch/Kconfig
--- 2/arch/Kconfig
+++ b/arch/Kconfig
@@@ -18,9 -18,6 +18,9 @@@ config KEXEC_COR
         select CRASH_CORE
         bool
   
+ +config KEXEC_ELF
+ +      bool
+ +
   config HAVE_IMA_KEXEC
         bool
   
@@@ -106,7 -103,7 +106,7 @@@ config STATIC_KEYS_SELFTES
   config OPTPROBES
         def_bool y
         depends on KPROBES && HAVE_OPTPROBES
- -      select TASKS_RCU if PREEMPT
+ +      select TASKS_RCU if PREEMPTION
   
   config KPROBES_ON_FTRACE
         def_bool y
@@@ -292,13 -289,6 +292,13 @@@ config ARCH_32BIT_OFF_
           still support 32-bit off_t. This option is enabled for all such
           architectures explicitly.
   
+ +config HAVE_ASM_MODVERSIONS
+ +      bool
+ +      help
+ +        This symbol should be selected by an architecure if it provides
+ +        <asm/asm-prototypes.h> to support the module versioning for symbols
+ +        exported from assembly code.
+ +
   config HAVE_REGS_AND_STACK_ACCESS_API
         bool
         help
@@@ -800,6 -790,9 +800,6 @@@ config COMPAT_32BIT_TIM
           This is relevant on all 32-bit architectures, and 64-bit architectures
           as part of compat syscall handling.
   
- -config ARCH_NO_COHERENT_DMA_MMAP
- -      bool
- -
   config ARCH_NO_PREEMPT
         bool
   
@@@ -932,20 -925,9 +932,23 @@@ config LOCK_EVENT_COUNT
           the chance of application behavior change because of timing
           differences. The counts are reported via debugfs.
   
+ +# Select if the architecture has support for applying RELR relocations.
+ +config ARCH_HAS_RELR
+ +      bool
+ +
+ +config RELR
+ +      bool "Use RELR relocation packing"
+ +      depends on ARCH_HAS_RELR && TOOLS_SUPPORT_RELR
+ +      default y
+ +      help
+ +        Store the kernel's dynamic relocations in the RELR relocation packing
+ +        format. Requires a compatible linker (LLD supports this feature), as
+ +        well as compatible NM and OBJCOPY utilities (llvm-nm and llvm-objcopy
+ +        are compatible).
+ +
+ config ARCH_HAS_MEM_ENCRYPT
+       bool
+ 
   source "kernel/gcov/Kconfig"
   
   source "scripts/gcc-plugins/Kconfig"
diff --combined arch/powerpc/Kconfig

index 19ee5f155a086c852d47388bec459c2358603f01,eea6d5095f717ddeed7f16d00efb866124e861b4..3e56c9c2f16eed8487190350bd615a702c9de498
--- 1/arch/powerpc/Kconfig
--- 2/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@@ -121,6 -121,7 +121,6 @@@ config PP
         select ARCH_32BIT_OFF_T if PPC32
         select ARCH_HAS_DEBUG_VIRTUAL
         select ARCH_HAS_DEVMEM_IS_ALLOWED
- -      select ARCH_HAS_DMA_MMAP_PGPROT
         select ARCH_HAS_ELF_RANDOMIZE
         select ARCH_HAS_FORTIFY_SOURCE
         select ARCH_HAS_GCOV_PROFILE_ALL
@@@ -128,14 -129,15 +128,15 @@@
         select ARCH_HAS_HUGEPD                  if HUGETLB_PAGE
         select ARCH_HAS_MMIOWB                  if PPC64
         select ARCH_HAS_PHYS_TO_DMA
-       select ARCH_HAS_PMEM_API                if PPC64
+       select ARCH_HAS_PMEM_API
         select ARCH_HAS_PTE_DEVMAP              if PPC_BOOK3S_64
         select ARCH_HAS_PTE_SPECIAL
         select ARCH_HAS_MEMBARRIER_CALLBACKS
-       select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
+       select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
         select ARCH_HAS_STRICT_KERNEL_RWX       if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
         select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
-       select ARCH_HAS_UACCESS_FLUSHCACHE      if PPC64
+       select ARCH_HAS_UACCESS_FLUSHCACHE
+       select ARCH_HAS_UACCESS_MCSAFE          if PPC64
         select ARCH_HAS_UBSAN_SANITIZE_ALL
         select ARCH_HAVE_NMI_SAFE_CMPXCHG
         select ARCH_KEEP_MEMBLOCK
@@@ -177,12 -179,12 +178,13 @@@
         select HAVE_ARCH_NVRAM_OPS
         select HAVE_ARCH_SECCOMP_FILTER
         select HAVE_ARCH_TRACEHOOK
+ +      select HAVE_ASM_MODVERSIONS
         select HAVE_C_RECORDMCOUNT
         select HAVE_CBPF_JIT                    if !PPC64
         select HAVE_STACKPROTECTOR              if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
         select HAVE_STACKPROTECTOR              if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
         select HAVE_CONTEXT_TRACKING            if PPC64
+       select HAVE_COPY_THREAD_TLS
         select HAVE_DEBUG_KMEMLEAK
         select HAVE_DEBUG_STACKOVERFLOW
         select HAVE_DYNAMIC_FTRACE
@@@ -512,7 -514,6 +514,7 @@@ config KEXEC_FIL
         select KEXEC_CORE
         select HAVE_IMA_KEXEC
         select BUILD_BIN2C
+ +      select KEXEC_ELF
         depends on PPC64
         depends on CRYPTO=y
         depends on CRYPTO_SHA256=y
@@@ -568,7 -569,7 +570,7 @@@ config CRASH_DUM
   
   config FA_DUMP
         bool "Firmware-assisted dump"
-       depends on PPC64 && PPC_RTAS
+       depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
         select CRASH_CORE
         select CRASH_DUMP
         help
@@@ -579,7 -580,26 +581,26 @@@
           is meant to be a kdump replacement offering robustness and
           speed not possible without system firmware assistance.
   
-         If unsure, say "N"
+         If unsure, say "y". Only special kernels like petitboot may
+         need to say "N" here.
+ 
+ config PRESERVE_FA_DUMP
+       bool "Preserve Firmware-assisted dump"
+       depends on PPC64 && PPC_POWERNV && !FA_DUMP
+       help
+         On a kernel with FA_DUMP disabled, this option helps to preserve
+         crash data from a previously crash'ed kernel. Useful when the next
+         memory preserving kernel boot would process this crash data.
+         Petitboot kernel is the typical usecase for this option.
+ 
+ config OPAL_CORE
+       bool "Export OPAL memory as /sys/firmware/opal/core"
+       depends on PPC64 && PPC_POWERNV
+       help
+         This option uses the MPIPL support in firmware to provide an
+         ELF core of OPAL memory after a crash. The ELF core is exported
+         as /sys/firmware/opal/core file which is helpful in debugging
+         OPAL crashes using GDB.
   
   config IRQ_ALL_CPUS
         bool "Distribute interrupts on all CPUs by default"
@@@ -1140,18 -1160,6 +1161,6 @@@ config TASK_SIZ
         default "0x80000000" if PPC_8xx
         default "0xc0000000"
   
- config CONSISTENT_SIZE_BOOL
-       bool "Set custom consistent memory pool size"
-       depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
-       help
-         This option allows you to set the size of the
-         consistent memory pool.  This pool of virtual memory
-         is used to make consistent memory allocations.
- 
- config CONSISTENT_SIZE
-       hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
-       default "0x00200000" if NOT_COHERENT_CACHE
- 
   config PIN_TLB
         bool "Pinned Kernel TLBs (860 ONLY)"
         depends on ADVANCED_OPTIONS && PPC_8xx && \
diff --combined arch/powerpc/Makefile

index 37990dd105dc1b2192ba2869b9549a7c6b710575,46ed198a3aa3a2f3484ecbe05682a75793084ba0..83522c9fc7b66afc742cb01c02eeba4558deba83
--- 1/arch/powerpc/Makefile
--- 2/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@@ -39,11 -39,13 +39,11 @@@ endi
   uname := $(shell uname -m)
   KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig
   
- -ifdef CONFIG_PPC64
   new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
   
   ifeq ($(new_nm),y)
   NM            := $(NM) --synthetic
   endif
- -endif
   
   # BITS is used as extension for files which are available in a 32 bit
   # and a 64 bit version to simplify shared Makefiles.
@@@ -65,7 -67,7 +65,7 @@@ UTS_MACHINE := $(subst $(space),,$(mach
   ifdef CONFIG_PPC32
   KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
   else
- -KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/powerpc/kernel/module.lds
+ +KBUILD_LDS_MODULE += $(srctree)/arch/powerpc/kernel/module.lds
   ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
   # Have the linker provide sfpr if possible.
   # There is a corresponding test in arch/powerpc/lib/Makefile
@@@ -110,7 -112,6 +110,6 @@@ ifeq ($(HAS_BIARCH),y
   KBUILD_CFLAGS += -m$(BITS)
   KBUILD_AFLAGS += -m$(BITS) -Wl,-a$(BITS)
   KBUILD_LDFLAGS        += -m elf$(BITS)$(LDEMULATION)
- KBUILD_ARFLAGS        += --target=elf$(BITS)-$(GNUTARGET)
   endif
   
   cflags-$(CONFIG_STACKPROTECTOR)       += -mstack-protector-guard=tls
diff --combined arch/powerpc/include/asm/kvm_host.h

index 6fb5fb4779e0dcec8ece9116e2625e6f476d5460,4bb552d639b808733eb793f67a930f5305b575ab..6fe6ad64cba57649f77d2cd3d0443097ee857e35
--- 1/arch/powerpc/include/asm/kvm_host.h
--- 2/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@@ -232,25 -232,11 +232,25 @@@ struct revmap_entry 
   };
   
   /*
- - * We use the top bit of each memslot->arch.rmap entry as a lock bit,
- - * and bit 32 as a present flag.  The bottom 32 bits are the
- - * index in the guest HPT of a HPTE that points to the page.
+ + * The rmap array of size number of guest pages is allocated for each memslot.
+ + * This array is used to store usage specific information about the guest page.
+ + * Below are the encodings of the various possible usage types.
    */
- -#define KVMPPC_RMAP_LOCK_BIT  63
+ +/* Free bits which can be used to define a new usage */
+ +#define KVMPPC_RMAP_TYPE_MASK 0xff00000000000000
+ +#define KVMPPC_RMAP_NESTED    0xc000000000000000      /* Nested rmap array */
+ +#define KVMPPC_RMAP_HPT               0x0100000000000000      /* HPT guest */
+ +
+ +/*
+ + * rmap usage definition for a hash page table (hpt) guest:
+ + * 0x0000080000000000 Lock bit
+ + * 0x0000018000000000 RC bits
+ + * 0x0000000100000000 Present bit
+ + * 0x00000000ffffffff HPT index bits
+ + * The bottom 32 bits are the index in the guest HPT of a HPTE that points to
+ + * the page.
+ + */
+ +#define KVMPPC_RMAP_LOCK_BIT  43
   #define KVMPPC_RMAP_RC_SHIFT  32
   #define KVMPPC_RMAP_REFERENCED        (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
   #define KVMPPC_RMAP_PRESENT   0x100000000ul
@@@ -297,6 -283,7 +297,7 @@@ struct kvm_arch 
         cpumask_t cpu_in_guest;
         u8 radix;
         u8 fwnmi_enabled;
+       u8 secure_guest;
         bool threads_indep;
         bool nested_enable;
         pgd_t *pgtable;
diff --combined arch/powerpc/include/asm/xive.h

index 818989e1167892f379cf2b4db16631dbb1ecf7aa,71f52f22c36b5830b03663f71ebef2aa0b570b89..24cdf97376c4554df91e62a25292386d26266c2b
--- 1/arch/powerpc/include/asm/xive.h
--- 2/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@@ -99,6 -99,7 +99,7 @@@ extern void xive_flush_interrupt(void)
   
   /* xmon hook */
   extern void xmon_xive_do_dump(int cpu);
+ extern int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d);
   
   /* APIs used by KVM */
   extern u32 xive_native_default_eq_shift(void);
@@@ -135,7 -136,6 +136,7 @@@ extern int xive_native_get_queue_state(
   extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
                                        u32 qindex);
   extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
+ +extern bool xive_native_has_queue_state_support(void);
   
   #else
   
diff --combined arch/powerpc/kernel/Makefile

index 56dfa7a2a6f2a09f7d3d3d385147f4b77714932c,21ab769e8530eb1bfd5f20484c62e83af46a68d3..a7ca8fe623686afb43f03c7335d9f73036497781
--- 1/arch/powerpc/kernel/Makefile
--- 2/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@@ -49,10 -49,11 +49,10 @@@ obj-y                              := cputable.o ptrace.o syscall
                                    signal.o sysfs.o cacheinfo.o time.o \
                                    prom.o traps.o setup-common.o \
                                    udbg.o misc.o io.o misc_$(BITS).o \
- -                                 of_platform.o prom_parse.o \
- -                                 dma-common.o
+ +                                 of_platform.o prom_parse.o
   obj-$(CONFIG_PPC64)           += setup_64.o sys_ppc32.o \
                                    signal_64.o ptrace32.o \
-                                  paca.o nvram_64.o firmware.o
+                                  paca.o nvram_64.o firmware.o note.o
   obj-$(CONFIG_VDSO32)          += vdso32/
   obj-$(CONFIG_PPC_WATCHDOG)    += watchdog.o
   obj-$(CONFIG_HAVE_HW_BREAKPOINT)      += hw_breakpoint.o
@@@ -78,7 -79,9 +78,9 @@@ obj-$(CONFIG_EEH)              += eeh.
                                   eeh_driver.o eeh_event.o eeh_sysfs.o
   obj-$(CONFIG_GENERIC_TBSYNC)  += smp-tbsync.o
   obj-$(CONFIG_CRASH_DUMP)      += crash_dump.o
- obj-$(CONFIG_FA_DUMP)         += fadump.o
+ ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),)
+ obj-y                         += fadump.o
+ endif
   ifdef CONFIG_PPC32
   obj-$(CONFIG_E500)            += idle_e500.o
   endif
@@@ -155,6 -158,9 +157,9 @@@ endi
   
   obj-$(CONFIG_EPAPR_PARAVIRT)  += epapr_paravirt.o epapr_hcalls.o
   obj-$(CONFIG_KVM_GUEST)               += kvm.o kvm_emul.o
+ ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
+ obj-y                         += ucall.o
+ endif
   
   # Disable GCOV, KCOV & sanitizers in odd or sensitive code
   GCOV_PROFILE_prom_init.o := n
@@@ -184,15 -190,13 +189,13 @@@ extra-$(CONFIG_ALTIVEC)         += vector.
   extra-$(CONFIG_PPC64)         += entry_64.o
   extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE)        += prom_init.o
   
- ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
- $(obj)/built-in.a:            prom_init_check
+ extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE)        += prom_init_check
   
- quiet_cmd_prom_init_check = CALL    $<
-       cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
+ quiet_cmd_prom_init_check = PROMCHK $@
+       cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@
   
- PHONY += prom_init_check
- prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
-       $(call cmd,prom_init_check)
- endif
+ $(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
+       $(call if_changed,prom_init_check)
+ targets += prom_init_check
   
   clean-files := vmlinux.lds
diff --combined arch/powerpc/kernel/dma-iommu.c

index 2f5a53874f6d4349580b58de3a396ea945c0a28b,c963d704fa31384024839e41b5cfa6b012e3e043..e486d1d78de28842b8a35af69880bbb12ce86cf8
--- 1/arch/powerpc/kernel/dma-iommu.c
--- 2/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@@ -122,18 -122,17 +122,17 @@@ int dma_iommu_dma_supported(struct devi
   {
         struct iommu_table *tbl = get_iommu_table_base(dev);
   
-       if (!tbl) {
-               dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
-                       ", table unavailable\n", mask);
-               return 0;
-       }
- 
         if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
                 dev->archdata.iommu_bypass = true;
                 dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
                 return 1;
         }
   
+       if (!tbl) {
+               dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
+               return 0;
+       }
+ 
         if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
                 dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
                 dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
@@@ -208,6 -207,4 +207,6 @@@ const struct dma_map_ops dma_iommu_ops 
         .sync_single_for_device = dma_iommu_sync_for_device,
         .sync_sg_for_cpu        = dma_iommu_sync_sg_for_cpu,
         .sync_sg_for_device     = dma_iommu_sync_sg_for_device,
+ +      .mmap                   = dma_common_mmap,
+ +      .get_sgtable            = dma_common_get_sgtable,
   };
diff --combined arch/powerpc/kernel/process.c

index 7a84c9f1778e6ade2878ea5387b58d7c91794c8f,f289bdd2b562c342086ee8bfaf849495d8cbcf46..639ceae7da9d81de567b4c3d967bd8fd6b85a017
--- 1/arch/powerpc/kernel/process.c
--- 2/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@@ -101,8 -101,21 +101,8 @@@ static void check_if_tm_restore_require
         }
   }
   
- -static bool tm_active_with_fp(struct task_struct *tsk)
- -{
- -      return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
- -              (tsk->thread.ckpt_regs.msr & MSR_FP);
- -}
- -
- -static bool tm_active_with_altivec(struct task_struct *tsk)
- -{
- -      return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
- -              (tsk->thread.ckpt_regs.msr & MSR_VEC);
- -}
   #else
   static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
- -static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
- -static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
   #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
   
   bool strict_msr_control;
@@@ -239,7 -252,7 +239,7 @@@ EXPORT_SYMBOL(enable_kernel_fp)
   
   static int restore_fp(struct task_struct *tsk)
   {
- -      if (tsk->thread.load_fp || tm_active_with_fp(tsk)) {
+ +      if (tsk->thread.load_fp) {
                 load_fp_state(&current->thread.fp_state);
                 current->thread.load_fp++;
                 return 1;
@@@ -321,7 -334,8 +321,7 @@@ EXPORT_SYMBOL_GPL(flush_altivec_to_thre
   
   static int restore_altivec(struct task_struct *tsk)
   {
- -      if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
- -              (tsk->thread.load_vec || tm_active_with_altivec(tsk))) {
+ +      if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
                 load_vr_state(&tsk->thread.vr_state);
                 tsk->thread.used_vr = 1;
                 tsk->thread.load_vec++;
@@@ -483,14 -497,13 +483,14 @@@ void giveup_all(struct task_struct *tsk
         if (!tsk->thread.regs)
                 return;
   
+ +      check_if_tm_restore_required(tsk);
+ +
         usermsr = tsk->thread.regs->msr;
   
         if ((usermsr & msr_all_available) == 0)
                 return;
   
         msr_check_and_set(msr_all_available);
- -      check_if_tm_restore_required(tsk);
   
         WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
   
@@@ -1587,8 -1600,9 +1587,9 @@@ static void setup_ksp_vsid(struct task_
   /*
    * Copy architecture-specific thread state
    */
- int copy_thread(unsigned long clone_flags, unsigned long usp,
-               unsigned long kthread_arg, struct task_struct *p)
+ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+               unsigned long kthread_arg, struct task_struct *p,
+               unsigned long tls)
   {
         struct pt_regs *childregs, *kregs;
         extern void ret_from_fork(void);
@@@ -1629,10 -1643,10 +1630,10 @@@
                 if (clone_flags & CLONE_SETTLS) {
   #ifdef CONFIG_PPC64
                         if (!is_32bit_task())
-                               childregs->gpr[13] = childregs->gpr[6];
+                               childregs->gpr[13] = tls;
                         else
   #endif
-                               childregs->gpr[2] = childregs->gpr[6];
+                               childregs->gpr[2] = tls;
                 }
   
                 f = ret_from_fork;
@@@ -2033,10 -2047,8 +2034,8 @@@ void show_stack(struct task_struct *tsk
         int count = 0;
         int firstframe = 1;
   #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       struct ftrace_ret_stack *ret_stack;
-       extern void return_to_handler(void);
-       unsigned long rth = (unsigned long)return_to_handler;
-       int curr_frame = 0;
+       unsigned long ret_addr;
+       int ftrace_idx = 0;
   #endif
   
         if (tsk == NULL)
@@@ -2065,15 -2077,10 +2064,10 @@@
                 if (!firstframe || ip != lr) {
                         printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
   #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-                       if ((ip == rth) && curr_frame >= 0) {
-                               ret_stack = ftrace_graph_get_ret_stack(current,
-                                                                 curr_frame++);
-                               if (ret_stack)
-                                       pr_cont(" (%pS)",
-                                               (void *)ret_stack->ret);
-                               else
-                                       curr_frame = -1;
-                       }
+                       ret_addr = ftrace_graph_ret_addr(current,
+                                               &ftrace_idx, ip, stack);
+                       if (ret_addr != ip)
+                               pr_cont(" (%pS)", (void *)ret_addr);
   #endif
                         if (firstframe)
                                 pr_cont(" (unreliable)");
diff --combined arch/powerpc/kernel/setup-common.c

index 5e6543aba1b32879e18cab1a4e6d1d45277c4ba5,7b4c921ec73faca1fc2acb657245f4db0d9c129d..25aaa390300091e310a81dfa28f9ccde7cc05a5f
--- 1/arch/powerpc/kernel/setup-common.c
--- 2/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@@ -778,6 -778,12 +778,6 @@@ void ppc_printk_progress(char *s, unsig
         pr_info("%s\n", s);
   }
   
- -void arch_setup_pdev_archdata(struct platform_device *pdev)
- -{
- -      pdev->archdata.dma_mask = DMA_BIT_MASK(32);
- -      pdev->dev.dma_mask = &pdev->archdata.dma_mask;
- -}
- -
   static __init void print_system_info(void)
   {
         pr_info("-----------------------------------------------------\n");
@@@ -800,9 -806,15 +800,15 @@@
         pr_info("mmu_features      = 0x%08x\n", cur_cpu_spec->mmu_features);
   #ifdef CONFIG_PPC64
         pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+ #ifdef CONFIG_PPC_BOOK3S
+       pr_info("vmalloc start     = 0x%lx\n", KERN_VIRT_START);
+       pr_info("IO start          = 0x%lx\n", KERN_IO_START);
+       pr_info("vmemmap start     = 0x%lx\n", (unsigned long)vmemmap);
+ #endif
   #endif
   
-       print_system_hash_info();
+       if (!early_radix_enabled())
+               print_system_hash_info();
   
         if (PHYSICAL_START > 0)
                 pr_info("physical_start    = 0x%llx\n",
diff --combined arch/powerpc/kvm/book3s_hv.c

index f8975c620f41c43758bb71c1d85cdb7b75150eff,3cdaa2a09a19235ee9ac56cba88c900f5516586d..efd8f93bc9dc1272686055f94d1cdbcee8f61d65
--- 1/arch/powerpc/kvm/book3s_hv.c
--- 2/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@@ -1678,14 -1678,7 +1678,14 @@@ static int kvmppc_get_one_reg_hv(struc
                 *val = get_reg_val(id, vcpu->arch.pspb);
                 break;
         case KVM_REG_PPC_DPDES:
- -              *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
+ +              /*
+ +               * On POWER9, where we are emulating msgsndp etc.,
+ +               * we return 1 bit for each vcpu, which can come from
+ +               * either vcore->dpdes or doorbell_request.
+ +               * On POWER8, doorbell_request is 0.
+ +               */
+ +              *val = get_reg_val(id, vcpu->arch.vcore->dpdes |
+ +                                 vcpu->arch.doorbell_request);
                 break;
         case KVM_REG_PPC_VTB:
                 *val = get_reg_val(id, vcpu->arch.vcore->vtb);
@@@ -2867,7 -2860,7 +2867,7 @@@ static void collect_piggybacks(struct c
                 if (!spin_trylock(&pvc->lock))
                         continue;
                 prepare_threads(pvc);
- -              if (!pvc->n_runnable) {
+ +              if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
                         list_del_init(&pvc->preempt_list);
                         if (pvc->runner == NULL) {
                                 pvc->vcore_state = VCORE_INACTIVE;
@@@ -2888,20 -2881,15 +2888,20 @@@
         spin_unlock(&lp->lock);
   }
   
- -static bool recheck_signals(struct core_info *cip)
+ +static bool recheck_signals_and_mmu(struct core_info *cip)
   {
         int sub, i;
         struct kvm_vcpu *vcpu;
+ +      struct kvmppc_vcore *vc;
   
- -      for (sub = 0; sub < cip->n_subcores; ++sub)
- -              for_each_runnable_thread(i, vcpu, cip->vc[sub])
+ +      for (sub = 0; sub < cip->n_subcores; ++sub) {
+ +              vc = cip->vc[sub];
+ +              if (!vc->kvm->arch.mmu_ready)
+ +                      return true;
+ +              for_each_runnable_thread(i, vcpu, vc)
                         if (signal_pending(vcpu->arch.run_task))
                                 return true;
+ +      }
         return false;
   }
   
@@@ -3131,7 -3119,7 +3131,7 @@@ static noinline void kvmppc_run_core(st
         local_irq_disable();
         hard_irq_disable();
         if (lazy_irq_pending() || need_resched() ||
- -          recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) {
+ +          recheck_signals_and_mmu(&core_info)) {
                 local_irq_enable();
                 vc->vcore_state = VCORE_INACTIVE;
                 /* Unlock all except the primary vcore */
@@@ -5474,6 -5462,12 +5474,12 @@@ static int kvmppc_radix_possible(void
   static int kvmppc_book3s_init_hv(void)
   {
         int r;
+ 
+       if (!tlbie_capable) {
+               pr_err("KVM-HV: Host does not support TLBIE\n");
+               return -ENODEV;
+       }
+ 
         /*
          * FIXME!! Do we need to check on all cpus ?
          */
diff --combined arch/powerpc/mm/dma-noncoherent.c

index c617282d5b2aead7023cc27b47cbab0d4a5c4586,4272ca5e8159abd6f2f814b9ca4186d73806e65c..2a82984356f81ffd3407361a50f8773adb272215
--- 1/arch/powerpc/mm/dma-noncoherent.c
--- 2/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@@ -4,310 -4,18 +4,18 @@@
    *    Copyright (C) 2001 Dan Malek ([email protected])
    *
    *  Copyright (C) 2000 Russell King
-  *
-  * Consistent memory allocators.  Used for DMA devices that want to
-  * share uncached memory with the processor core.  The function return
-  * is the virtual address and 'dma_handle' is the physical address.
-  * Mostly stolen from the ARM port, with some changes for PowerPC.
-  *                                            -- Dan
-  *
-  * Reorganized to get rid of the arch-specific consistent_* functions
-  * and provide non-coherent implementations for the DMA API. -Matt
-  *
-  * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
-  * implementation. This is pulled straight from ARM and barely
-  * modified. -Matt
    */
   
- #include <linux/sched.h>
- #include <linux/slab.h>
   #include <linux/kernel.h>
   #include <linux/errno.h>
- #include <linux/string.h>
   #include <linux/types.h>
   #include <linux/highmem.h>
   #include <linux/dma-direct.h>
   #include <linux/dma-noncoherent.h>
- #include <linux/export.h>
   
   #include <asm/tlbflush.h>
   #include <asm/dma.h>
   
- #include <mm/mmu_decl.h>
- 
- /*
-  * This address range defaults to a value that is safe for all
-  * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
-  * can be further configured for specific applications under
-  * the "Advanced Setup" menu. -Matt
-  */
- #define CONSISTENT_BASE               (IOREMAP_TOP)
- #define CONSISTENT_END                (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE)
- #define CONSISTENT_OFFSET(x)  (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
- 
- /*
-  * This is the page table (2MB) covering uncached, DMA consistent allocations
-  */
- static DEFINE_SPINLOCK(consistent_lock);
- 
- /*
-  * VM region handling support.
-  *
-  * This should become something generic, handling VM region allocations for
-  * vmalloc and similar (ioremap, module space, etc).
-  *
-  * I envisage vmalloc()'s supporting vm_struct becoming:
-  *
-  *  struct vm_struct {
-  *    struct vm_region        region;
-  *    unsigned long   flags;
-  *    struct page     **pages;
-  *    unsigned int    nr_pages;
-  *    unsigned long   phys_addr;
-  *  };
-  *
-  * get_vm_area() would then call vm_region_alloc with an appropriate
-  * struct vm_region head (eg):
-  *
-  *  struct vm_region vmalloc_head = {
-  *    .vm_list        = LIST_HEAD_INIT(vmalloc_head.vm_list),
-  *    .vm_start       = VMALLOC_START,
-  *    .vm_end         = VMALLOC_END,
-  *  };
-  *
-  * However, vmalloc_head.vm_start is variable (typically, it is dependent on
-  * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
-  * would have to initialise this each time prior to calling vm_region_alloc().
-  */
- struct ppc_vm_region {
-       struct list_head        vm_list;
-       unsigned long           vm_start;
-       unsigned long           vm_end;
- };
- 
- static struct ppc_vm_region consistent_head = {
-       .vm_list        = LIST_HEAD_INIT(consistent_head.vm_list),
-       .vm_start       = CONSISTENT_BASE,
-       .vm_end         = CONSISTENT_END,
- };
- 
- static struct ppc_vm_region *
- ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
- {
-       unsigned long addr = head->vm_start, end = head->vm_end - size;
-       unsigned long flags;
-       struct ppc_vm_region *c, *new;
- 
-       new = kmalloc(sizeof(struct ppc_vm_region), gfp);
-       if (!new)
-               goto out;
- 
-       spin_lock_irqsave(&consistent_lock, flags);
- 
-       list_for_each_entry(c, &head->vm_list, vm_list) {
-               if ((addr + size) < addr)
-                       goto nospc;
-               if ((addr + size) <= c->vm_start)
-                       goto found;
-               addr = c->vm_end;
-               if (addr > end)
-                       goto nospc;
-       }
- 
-  found:
-       /*
-        * Insert this entry _before_ the one we found.
-        */
-       list_add_tail(&new->vm_list, &c->vm_list);
-       new->vm_start = addr;
-       new->vm_end = addr + size;
- 
-       spin_unlock_irqrestore(&consistent_lock, flags);
-       return new;
- 
-  nospc:
-       spin_unlock_irqrestore(&consistent_lock, flags);
-       kfree(new);
-  out:
-       return NULL;
- }
- 
- static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
- {
-       struct ppc_vm_region *c;
- 
-       list_for_each_entry(c, &head->vm_list, vm_list) {
-               if (c->vm_start == addr)
-                       goto out;
-       }
-       c = NULL;
-  out:
-       return c;
- }
- 
- /*
-  * Allocate DMA-coherent memory space and return both the kernel remapped
-  * virtual and bus address for that space.
-  */
- void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               gfp_t gfp, unsigned long attrs)
- {
-       struct page *page;
-       struct ppc_vm_region *c;
-       unsigned long order;
-       u64 mask = ISA_DMA_THRESHOLD, limit;
- 
-       if (dev) {
-               mask = dev->coherent_dma_mask;
- 
-               /*
-                * Sanity check the DMA mask - it must be non-zero, and
-                * must be able to be satisfied by a DMA allocation.
-                */
-               if (mask == 0) {
-                       dev_warn(dev, "coherent DMA mask is unset\n");
-                       goto no_page;
-               }
- 
-               if ((~mask) & ISA_DMA_THRESHOLD) {
-                       dev_warn(dev, "coherent DMA mask %#llx is smaller "
-                                "than system GFP_DMA mask %#llx\n",
-                                mask, (unsigned long long)ISA_DMA_THRESHOLD);
-                       goto no_page;
-               }
-       }
- 
- 
-       size = PAGE_ALIGN(size);
-       limit = (mask + 1) & ~mask;
-       if ((limit && size >= limit) ||
-           size >= (CONSISTENT_END - CONSISTENT_BASE)) {
-               printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
-                      size, mask);
-               return NULL;
-       }
- 
-       order = get_order(size);
- 
-       /* Might be useful if we ever have a real legacy DMA zone... */
-       if (mask != 0xffffffff)
-               gfp |= GFP_DMA;
- 
-       page = alloc_pages(gfp, order);
-       if (!page)
-               goto no_page;
- 
-       /*
-        * Invalidate any data that might be lurking in the
-        * kernel direct-mapped region for device DMA.
-        */
-       {
-               unsigned long kaddr = (unsigned long)page_address(page);
-               memset(page_address(page), 0, size);
-               flush_dcache_range(kaddr, kaddr + size);
-       }
- 
-       /*
-        * Allocate a virtual address in the consistent mapping region.
-        */
-       c = ppc_vm_region_alloc(&consistent_head, size,
-                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
-       if (c) {
-               unsigned long vaddr = c->vm_start;
-               struct page *end = page + (1 << order);
- 
-               split_page(page, order);
- 
-               /*
-                * Set the "dma handle"
-                */
-               *dma_handle = phys_to_dma(dev, page_to_phys(page));
- 
-               do {
-                       SetPageReserved(page);
-                       map_kernel_page(vaddr, page_to_phys(page),
-                                       pgprot_noncached(PAGE_KERNEL));
-                       page++;
-                       vaddr += PAGE_SIZE;
-               } while (size -= PAGE_SIZE);
- 
-               /*
-                * Free the otherwise unused pages.
-                */
-               while (page < end) {
-                       __free_page(page);
-                       page++;
-               }
- 
-               return (void *)c->vm_start;
-       }
- 
-       if (page)
-               __free_pages(page, order);
-  no_page:
-       return NULL;
- }
- 
- /*
-  * free a page as defined by the above mapping.
-  */
- void arch_dma_free(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_handle, unsigned long attrs)
- {
-       struct ppc_vm_region *c;
-       unsigned long flags, addr;
-       
-       size = PAGE_ALIGN(size);
- 
-       spin_lock_irqsave(&consistent_lock, flags);
- 
-       c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
-       if (!c)
-               goto no_area;
- 
-       if ((c->vm_end - c->vm_start) != size) {
-               printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
-                      __func__, c->vm_end - c->vm_start, size);
-               dump_stack();
-               size = c->vm_end - c->vm_start;
-       }
- 
-       addr = c->vm_start;
-       do {
-               pte_t *ptep;
-               unsigned long pfn;
- 
-               ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr),
-                                                              addr),
-                                                   addr),
-                                        addr);
-               if (!pte_none(*ptep) && pte_present(*ptep)) {
-                       pfn = pte_pfn(*ptep);
-                       pte_clear(&init_mm, addr, ptep);
-                       if (pfn_valid(pfn)) {
-                               struct page *page = pfn_to_page(pfn);
-                               __free_reserved_page(page);
-                       }
-               }
-               addr += PAGE_SIZE;
-       } while (size -= PAGE_SIZE);
- 
-       flush_tlb_kernel_range(c->vm_start, c->vm_end);
- 
-       list_del(&c->vm_list);
- 
-       spin_unlock_irqrestore(&consistent_lock, flags);
- 
-       kfree(c);
-       return;
- 
-  no_area:
-       spin_unlock_irqrestore(&consistent_lock, flags);
-       printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
-              __func__, vaddr);
-       dump_stack();
- }
- 
   /*
    * make an area consistent.
    */
@@@ -408,23 -116,15 +116,9 @@@ void arch_sync_dma_for_cpu(struct devic
         __dma_sync_page(paddr, size, dir);
   }
   
- /*
-  * Return the PFN for a given cpu virtual address returned by arch_dma_alloc.
-  */
- long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
-               dma_addr_t dma_addr)
+ void arch_dma_prep_coherent(struct page *page, size_t size)
   {
-       /* This should always be populated, so we don't test every
-        * level. If that fails, we'll have a nice crash which
-        * will be as good as a BUG_ON()
-        */
-       unsigned long cpu_addr = (unsigned long)vaddr;
-       pgd_t *pgd = pgd_offset_k(cpu_addr);
-       pud_t *pud = pud_offset(pgd, cpu_addr);
-       pmd_t *pmd = pmd_offset(pud, cpu_addr);
-       pte_t *ptep = pte_offset_kernel(pmd, cpu_addr);
+       unsigned long kaddr = (unsigned long)page_address(page);
   
-       if (pte_none(*ptep) || !pte_present(*ptep))
-               return 0;
-       return pte_pfn(*ptep);
+       flush_dcache_range(kaddr, kaddr + size);
   }
- -
- -static int __init atomic_pool_init(void)
- -{
- -      return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
- -}
- -postcore_initcall(atomic_pool_init);
diff --combined arch/powerpc/platforms/pseries/vio.c

index 3473eef7628c376d34fa0cf6570a7b5327443439,115934f8393526031b13390f528eaa35b8df7972..79e2287991dbb8fe4a7926d6f8a4423a0e30d76f
--- 1/arch/powerpc/platforms/pseries/vio.c
--- 2/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@@ -605,8 -605,6 +605,8 @@@ static const struct dma_map_ops vio_dma
         .unmap_page        = vio_dma_iommu_unmap_page,
         .dma_supported     = dma_iommu_dma_supported,
         .get_required_mask = dma_iommu_get_required_mask,
+ +      .mmap              = dma_common_mmap,
+ +      .get_sgtable       = dma_common_get_sgtable,
   };
   
   /**
@@@ -1193,7 -1191,7 +1193,7 @@@ static struct iommu_table *vio_build_io
         else
                 tbl->it_ops = &iommu_table_pseries_ops;
   
-       return iommu_init_table(tbl, -1);
+       return iommu_init_table(tbl, -1, 0, 0);
   }
   
   /**
diff --combined arch/powerpc/sysdev/xive/native.c

index 37987c815913a9f5a32e423e16d9ebf6791fa95d,e9481468ebd8e5b7a0760814a5f1bc215473680f..0ff6b739052c8fc63daaba039d0a0889d14906de
--- 1/arch/powerpc/sysdev/xive/native.c
--- 2/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@@ -111,6 -111,20 +111,20 @@@ int xive_native_configure_irq(u32 hw_ir
   }
   EXPORT_SYMBOL_GPL(xive_native_configure_irq);
   
+ static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio,
+                                     u32 *sw_irq)
+ {
+       s64 rc;
+       __be64 vp;
+       __be32 lirq;
+ 
+       rc = opal_xive_get_irq_config(hw_irq, &vp, prio, &lirq);
+ 
+       *target = be64_to_cpu(vp);
+       *sw_irq = be32_to_cpu(lirq);
+ 
+       return rc == 0 ? 0 : -ENXIO;
+ }
   
   /* This can be called multiple time to change a queue configuration */
   int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
@@@ -231,6 -245,17 +245,17 @@@ static bool xive_native_match(struct de
         return of_device_is_compatible(node, "ibm,opal-xive-vc");
   }
   
+ static s64 opal_xive_allocate_irq(u32 chip_id)
+ {
+       s64 irq = opal_xive_allocate_irq_raw(chip_id);
+ 
+       /*
+        * Old versions of skiboot can incorrectly return 0xffffffff to
+        * indicate no space, fix it up here.
+        */
+       return irq == 0xffffffff ? OPAL_RESOURCE : irq;
+ }
+ 
   #ifdef CONFIG_SMP
   static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
   {
@@@ -442,6 -467,7 +467,7 @@@ EXPORT_SYMBOL_GPL(xive_native_sync_queu
   static const struct xive_ops xive_native_ops = {
         .populate_irq_data      = xive_native_populate_irq_data,
         .configure_irq          = xive_native_configure_irq,
+       .get_irq_config         = xive_native_get_irq_config,
         .setup_queue            = xive_native_setup_queue,
         .cleanup_queue          = xive_native_cleanup_queue,
         .match                  = xive_native_match,
@@@ -800,13 -826,6 +826,13 @@@ int xive_native_set_queue_state(u32 vp_
   }
   EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
   
+ +bool xive_native_has_queue_state_support(void)
+ +{
+ +      return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) &&
+ +              opal_check_token(OPAL_XIVE_SET_QUEUE_STATE);
+ +}
+ +EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support);
+ +
   int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
   {
         __be64 state;
diff --combined arch/s390/Kconfig

index ea5eac00b32792f9274438a68272795b51ee8121,f43319c444548eaaabf2202c7eb8786fcee41457..f933a473b128e3b820accef103dc61ad9d49c02e
--- 1/arch/s390/Kconfig
--- 2/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@@ -1,7 -1,4 +1,4 @@@
   # SPDX-License-Identifier: GPL-2.0
- config ARCH_HAS_MEM_ENCRYPT
-         def_bool y
- 
   config MMU
         def_bool y
   
@@@ -68,6 -65,7 +65,7 @@@ config S39
         select ARCH_HAS_GCOV_PROFILE_ALL
         select ARCH_HAS_GIGANTIC_PAGE
         select ARCH_HAS_KCOV
+       select ARCH_HAS_MEM_ENCRYPT
         select ARCH_HAS_PTE_SPECIAL
         select ARCH_HAS_SET_MEMORY
         select ARCH_HAS_STRICT_KERNEL_RWX
@@@ -105,7 -103,6 +103,7 @@@
         select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
         select ARCH_KEEP_MEMBLOCK
         select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+ +      select ARCH_STACKWALK
         select ARCH_SUPPORTS_ATOMIC_RMW
         select ARCH_SUPPORTS_NUMA_BALANCING
         select ARCH_USE_BUILTIN_BSWAP
@@@ -132,7 -129,6 +130,7 @@@
         select HAVE_ARCH_TRACEHOOK
         select HAVE_ARCH_TRANSPARENT_HUGEPAGE
         select HAVE_ARCH_VMAP_STACK
+ +      select HAVE_ASM_MODVERSIONS
         select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
         select HAVE_CMPXCHG_DOUBLE
         select HAVE_CMPXCHG_LOCAL
@@@ -238,10 -234,6 +236,10 @@@ config HAVE_MARCH_Z14_FEATURE
         def_bool n
         select HAVE_MARCH_Z13_FEATURES
   
+ +config HAVE_MARCH_Z15_FEATURES
+ +      def_bool n
+ +      select HAVE_MARCH_Z14_FEATURES
+ +
   choice
         prompt "Processor type"
         default MARCH_Z196
@@@ -313,14 -305,6 +311,14 @@@ config MARCH_Z1
           and 3906 series). The kernel will be slightly faster but will not
           work on older machines.
   
+ +config MARCH_Z15
+ +      bool "IBM z15"
+ +      select HAVE_MARCH_Z15_FEATURES
+ +      help
+ +        Select this to enable optimizations for IBM z15 (8562
+ +        and 8561 series). The kernel will be slightly faster but will not
+ +        work on older machines.
+ +
   endchoice
   
   config MARCH_Z900_TUNE
@@@ -347,9 -331,6 +345,9 @@@ config MARCH_Z13_TUN
   config MARCH_Z14_TUNE
         def_bool TUNE_Z14 || MARCH_Z14 && TUNE_DEFAULT
   
+ +config MARCH_Z15_TUNE
+ +      def_bool TUNE_Z15 || MARCH_Z15 && TUNE_DEFAULT
+ +
   choice
         prompt "Tune code generation"
         default TUNE_DEFAULT
@@@ -394,9 -375,6 +392,9 @@@ config TUNE_Z1
   config TUNE_Z14
         bool "IBM z14"
   
+ +config TUNE_Z15
+ +      bool "IBM z15"
+ +
   endchoice
   
   config 64BIT
diff --combined arch/x86/Kconfig

index c28ab5c01879f631fa6429d6cb01f6c5e6d64a8e,06027809c59990153f14d57b29a7cee11dc4ea00..37ed5f5910d55cd6502c645e051be500accabb27
--- 1/arch/x86/Kconfig
--- 2/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -68,6 -68,7 +68,7 @@@ config X8
         select ARCH_HAS_FORTIFY_SOURCE
         select ARCH_HAS_GCOV_PROFILE_ALL
         select ARCH_HAS_KCOV                    if X86_64
+       select ARCH_HAS_MEM_ENCRYPT
         select ARCH_HAS_MEMBARRIER_SYNC_CORE
         select ARCH_HAS_PMEM_API                if X86_64
         select ARCH_HAS_PTE_DEVMAP              if X86_64
@@@ -147,7 -148,6 +148,7 @@@
         select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
         select HAVE_ARCH_VMAP_STACK             if X86_64
         select HAVE_ARCH_WITHIN_STACK_FRAMES
+ +      select HAVE_ASM_MODVERSIONS
         select HAVE_CMPXCHG_DOUBLE
         select HAVE_CMPXCHG_LOCAL
         select HAVE_CONTEXT_TRACKING            if X86_64
@@@ -795,7 -795,6 +796,7 @@@ config KVM_GUES
         bool "KVM Guest support (including kvmclock)"
         depends on PARAVIRT
         select PARAVIRT_CLOCK
+ +      select ARCH_CPUIDLE_HALTPOLL
         default y
         ---help---
           This option enables various optimizations for running under the KVM
@@@ -804,12 -803,6 +805,12 @@@
           underlying device model, the host provides the guest with
           timing infrastructure such as time of day, and system time
   
+ +config ARCH_CPUIDLE_HALTPOLL
+ +        def_bool n
+ +        prompt "Disable host haltpoll when loading haltpoll driver"
+ +        help
+ +        If virtualized under KVM, disable host haltpoll.
+ +
   config PVH
         bool "Support for running PVH guests"
         ---help---
@@@ -1511,7 -1504,7 +1512,7 @@@ config X86_5LEVE
   
   config X86_DIRECT_GBPAGES
         def_bool y
- -      depends on X86_64 && !DEBUG_PAGEALLOC
+ +      depends on X86_64
         ---help---
           Certain kernel features effectively disable kernel
           linear 1 GB mappings (even if the CPU otherwise
@@@ -1526,9 -1519,6 +1527,6 @@@ config X86_CPA_STATISTIC
           helps to determine the effectiveness of preserving large and huge
           page mappings when mapping protections are changed.
   
- config ARCH_HAS_MEM_ENCRYPT
-       def_bool y
- 
   config AMD_MEM_ENCRYPT
         bool "AMD Secure Memory Encryption (SME) support"
         depends on X86_64 && CPU_SUP_AMD
diff --combined kernel/dma/mapping.c

index 64a3d294f4b45734d40126511d192421153c1d16,61eeefbfcb36060e93858424a6d0774f3e0e8aed..d9334f31a5afb08b9f0413e7df18d35f703f5a09
--- 1/kernel/dma/mapping.c
--- 2/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@@ -116,16 -116,11 +116,16 @@@ int dma_common_get_sgtable(struct devic
         int ret;
   
         if (!dev_is_dma_coherent(dev)) {
+ +              unsigned long pfn;
+ +
                 if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
                         return -ENXIO;
   
- -              page = pfn_to_page(arch_dma_coherent_to_pfn(dev, cpu_addr,
- -                              dma_addr));
+ +              /* If the PFN is not valid, we do not have a struct page */
+ +              pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr);
+ +              if (!pfn_valid(pfn))
+ +                      return -ENXIO;
+ +              page = pfn_to_page(pfn);
         } else {
                 page = virt_to_page(cpu_addr);
         }
@@@ -136,51 -131,20 +136,51 @@@
         return ret;
   }
   
+ +/*
+ + * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
+ + * that the intention is to allow exporting memory allocated via the
+ + * coherent DMA APIs through the dma_buf API, which only accepts a
+ + * scattertable.  This presents a couple of problems:
+ + * 1. Not all memory allocated via the coherent DMA APIs is backed by
+ + *    a struct page
+ + * 2. Passing coherent DMA memory into the streaming APIs is not allowed
+ + *    as we will try to flush the memory through a different alias to that
+ + *    actually being used (and the flushes are redundant.)
+ + */
   int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
                 void *cpu_addr, dma_addr_t dma_addr, size_t size,
                 unsigned long attrs)
   {
         const struct dma_map_ops *ops = get_dma_ops(dev);
   
- -      if (!dma_is_direct(ops) && ops->get_sgtable)
- -              return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
- -                                      attrs);
- -      return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
- -                      attrs);
+ +      if (dma_is_direct(ops))
+ +              return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr,
+ +                              size, attrs);
+ +      if (!ops->get_sgtable)
+ +              return -ENXIO;
+ +      return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs);
   }
   EXPORT_SYMBOL(dma_get_sgtable_attrs);
   
+ +#ifdef CONFIG_MMU
+ +/*
+ + * Return the page attributes used for mapping dma_alloc_* memory, either in
+ + * kernel space if remapping is needed, or to userspace through dma_mmap_*.
+ + */
+ +pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs)
+ +{
+ +      if (dev_is_dma_coherent(dev) ||
+ +          (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) &&
+ +             (attrs & DMA_ATTR_NON_CONSISTENT)))
+ +              return prot;
+ +#ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE
+ +      if (attrs & DMA_ATTR_WRITE_COMBINE)
+ +              return pgprot_writecombine(prot);
+ +#endif
+ +      return pgprot_dmacoherent(prot);
+ +}
+ +#endif /* CONFIG_MMU */
+ +
   /*
    * Create userspace mapping for the DMA-coherent memory.
    */
@@@ -188,14 -152,14 +188,14 @@@ int dma_common_mmap(struct device *dev
                 void *cpu_addr, dma_addr_t dma_addr, size_t size,
                 unsigned long attrs)
   {
- -#ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP
+ +#ifdef CONFIG_MMU
         unsigned long user_count = vma_pages(vma);
         unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
         unsigned long off = vma->vm_pgoff;
         unsigned long pfn;
         int ret = -ENXIO;
   
- -      vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);
+ +      vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
   
         if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
                 return ret;
@@@ -206,11 -170,7 +206,11 @@@
         if (!dev_is_dma_coherent(dev)) {
                 if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
                         return -ENXIO;
+ +
+ +              /* If the PFN is not valid, we do not have a struct page */
                 pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr);
+ +              if (!pfn_valid(pfn))
+ +                      return -ENXIO;
         } else {
                 pfn = page_to_pfn(virt_to_page(cpu_addr));
         }
@@@ -219,30 -179,9 +219,30 @@@
                         user_count << PAGE_SHIFT, vma->vm_page_prot);
   #else
         return -ENXIO;
- -#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
+ +#endif /* CONFIG_MMU */
   }
   
+ +/**
+ + * dma_can_mmap - check if a given device supports dma_mmap_*
+ + * @dev: device to check
+ + *
+ + * Returns %true if @dev supports dma_mmap_coherent() and dma_mmap_attrs() to
+ + * map DMA allocations to userspace.
+ + */
+ +bool dma_can_mmap(struct device *dev)
+ +{
+ +      const struct dma_map_ops *ops = get_dma_ops(dev);
+ +
+ +      if (dma_is_direct(ops)) {
+ +              return IS_ENABLED(CONFIG_MMU) &&
+ +                     (dev_is_dma_coherent(dev) ||
+ +                      IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN));
+ +      }
+ +
+ +      return ops->mmap != NULL;
+ +}
+ +EXPORT_SYMBOL_GPL(dma_can_mmap);
+ +
   /**
    * dma_mmap_attrs - map a coherent DMA allocation into user space
    * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@@ -262,15 -201,31 +262,15 @@@ int dma_mmap_attrs(struct device *dev, 
   {
         const struct dma_map_ops *ops = get_dma_ops(dev);
   
- -      if (!dma_is_direct(ops) && ops->mmap)
- -              return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
- -      return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+ +      if (dma_is_direct(ops))
+ +              return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size,
+ +                              attrs);
+ +      if (!ops->mmap)
+ +              return -ENXIO;
+ +      return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
   }
   EXPORT_SYMBOL(dma_mmap_attrs);
   
- -static u64 dma_default_get_required_mask(struct device *dev)
- -{
- -      u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
- -      u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
- -      u64 mask;
- -
- -      if (!high_totalram) {
- -              /* convert to mask just covering totalram */
- -              low_totalram = (1 << (fls(low_totalram) - 1));
- -              low_totalram += low_totalram - 1;
- -              mask = low_totalram;
- -      } else {
- -              high_totalram = (1 << (fls(high_totalram) - 1));
- -              high_totalram += high_totalram - 1;
- -              mask = (((u64)high_totalram) << 32) + 0xffffffff;
- -      }
- -      return mask;
- -}
- -
   u64 dma_get_required_mask(struct device *dev)
   {
         const struct dma_map_ops *ops = get_dma_ops(dev);
@@@ -279,16 -234,7 +279,16 @@@
                 return dma_direct_get_required_mask(dev);
         if (ops->get_required_mask)
                 return ops->get_required_mask(dev);
- -      return dma_default_get_required_mask(dev);
+ +
+ +      /*
+ +       * We require every DMA ops implementation to at least support a 32-bit
+ +       * DMA mask (and use bounce buffering if that isn't supported in
+ +       * hardware).  As the direct mapping code has its own routine to
+ +       * actually report an optimal mask we default to 32-bit here as that
+ +       * is the right thing for most IOMMUs, and at least not actively
+ +       * harmful in general.
+ +       */
+ +      return DMA_BIT_MASK(32);
   }
   EXPORT_SYMBOL_GPL(dma_get_required_mask);
   
@@@ -345,12 -291,6 +345,6 @@@ void dma_free_attrs(struct device *dev
   }
   EXPORT_SYMBOL(dma_free_attrs);
   
- static inline void dma_check_mask(struct device *dev, u64 mask)
- {
-       if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1)))
-               dev_warn(dev, "SME is active, device will require DMA bounce buffers\n");
- }
- 
   int dma_supported(struct device *dev, u64 mask)
   {
         const struct dma_map_ops *ops = get_dma_ops(dev);
@@@ -381,7 -321,6 +375,6 @@@ int dma_set_mask(struct device *dev, u6
                 return -EIO;
   
         arch_dma_set_mask(dev, mask);
-       dma_check_mask(dev, mask);
         *dev->dma_mask = mask;
         return 0;
   }
@@@ -399,7 -338,6 +392,6 @@@ int dma_set_coherent_mask(struct devic
         if (!dma_supported(dev, mask))
                 return -EIO;
   
-       dma_check_mask(dev, mask);
         dev->coherent_dma_mask = mask;
         return 0;
   }
@@@ -433,14 -371,3 +425,14 @@@ size_t dma_max_mapping_size(struct devi
         return size;
   }
   EXPORT_SYMBOL_GPL(dma_max_mapping_size);
+ +
+ +unsigned long dma_get_merge_boundary(struct device *dev)
+ +{
+ +      const struct dma_map_ops *ops = get_dma_ops(dev);
+ +
+ +      if (!ops || !ops->get_merge_boundary)
+ +              return 0;       /* can't merge */
+ +
+ +      return ops->get_merge_boundary(dev);
+ +}
+ +EXPORT_SYMBOL_GPL(dma_get_merge_boundary);
diff --combined kernel/dma/swiotlb.c

index 796a44f8ef5a9027bd70c3c89e01c44f1c01dd62,f29caad71e1302766ad0786e1ee7b68e2eac692e..673a2cdb2656b01ada63b12cd62d30a91b9aaa79
--- 1/kernel/dma/swiotlb.c
--- 2/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@@ -444,9 -444,7 +444,9 @@@ static void swiotlb_bounce(phys_addr_t 
   
   phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
                                    dma_addr_t tbl_dma_addr,
- -                                 phys_addr_t orig_addr, size_t size,
+ +                                 phys_addr_t orig_addr,
+ +                                 size_t mapping_size,
+ +                                 size_t alloc_size,
                                    enum dma_data_direction dir,
                                    unsigned long attrs)
   {
@@@ -463,15 -461,8 +463,14 @@@
                 panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
   
         if (mem_encrypt_active())
-               pr_warn_once("%s is active and system is using DMA bounce buffers\n",
-                            sme_active() ? "SME" : "SEV");
+               pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
   
+ +      if (mapping_size > alloc_size) {
+ +              dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+ +                            mapping_size, alloc_size);
+ +              return (phys_addr_t)DMA_MAPPING_ERROR;
+ +      }
+ +
         mask = dma_get_seg_boundary(hwdev);
   
         tbl_dma_addr &= mask;
@@@ -479,8 -470,8 +478,8 @@@
         offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
   
         /*
- -       * Carefully handle integer overflow which can occur when mask == ~0UL.
- -       */
+ +       * Carefully handle integer overflow which can occur when mask == ~0UL.
+ +       */
         max_slots = mask + 1
                     ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
                     : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
@@@ -489,8 -480,8 +488,8 @@@
          * For mappings greater than or equal to a page, we limit the stride
          * (and hence alignment) to a page size.
          */
- -      nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- -      if (size >= PAGE_SIZE)
+ +      nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ +      if (alloc_size >= PAGE_SIZE)
                 stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
         else
                 stride = 1;
@@@ -555,7 -546,7 +554,7 @@@ not_found
         spin_unlock_irqrestore(&io_tlb_lock, flags);
         if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
                 dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
- -                       size, io_tlb_nslabs, tmp_io_tlb_used);
+ +                       alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
         return (phys_addr_t)DMA_MAPPING_ERROR;
   found:
         io_tlb_used += nslots;
@@@ -570,7 -561,7 +569,7 @@@
                 io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
             (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- -              swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
+ +              swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
   
         return tlb_addr;
   }
@@@ -579,11 -570,11 +578,11 @@@
    * tlb_addr is the physical address of the bounce buffer to unmap.
    */
   void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
- -                            size_t size, enum dma_data_direction dir,
- -                            unsigned long attrs)
+ +                            size_t mapping_size, size_t alloc_size,
+ +                            enum dma_data_direction dir, unsigned long attrs)
   {
         unsigned long flags;
- -      int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ +      int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
         int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
         phys_addr_t orig_addr = io_tlb_orig_addr[index];
   
@@@ -593,7 -584,7 +592,7 @@@
         if (orig_addr != INVALID_PHYS_ADDR &&
             !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
             ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
- -              swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
+ +              swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE);
   
         /*
          * Return the buffer to the free list by setting the corresponding
@@@ -673,14 -664,14 +672,14 @@@ bool swiotlb_map(struct device *dev, ph
   
         /* Oh well, have to allocate and map a bounce buffer. */
         *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
- -                      *phys, size, dir, attrs);
+ +                      *phys, size, size, dir, attrs);
         if (*phys == (phys_addr_t)DMA_MAPPING_ERROR)
                 return false;
   
         /* Ensure that the address returned is DMA'ble */
         *dma_addr = __phys_to_dma(dev, *phys);
         if (unlikely(!dma_capable(dev, *dma_addr, size))) {
- -              swiotlb_tbl_unmap_single(dev, *phys, size, dir,
+ +              swiotlb_tbl_unmap_single(dev, *phys, size, size, dir,
                         attrs | DMA_ATTR_SKIP_CPU_SYNC);
                 return false;
         }
author	Linus Torvalds <[email protected]>
	Fri, 20 Sep 2019 18:48:06 +0000 (11:48 -0700)
committer	Linus Torvalds <[email protected]>
	Fri, 20 Sep 2019 18:48:06 +0000 (11:48 -0700)
		1	2
Documentation/admin-guide/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
arch/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/xive.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/dma-iommu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/process.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/setup-common.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/mm/dma-noncoherent.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/pseries/vio.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/sysdev/xive/native.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/dma/mapping.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/dma/swiotlb.c	patch \|	diff1 \|	diff2 \|	blob \| history