Merge tag 'v4.8-rc8' into drm-next

author Dave Airlie <[email protected]>

Wed, 28 Sep 2016 02:08:49 +0000 (12:08 +1000)

committer Dave Airlie <[email protected]>

Wed, 28 Sep 2016 02:08:49 +0000 (12:08 +1000)
author Dave Airlie <[email protected]>
Wed, 28 Sep 2016 02:08:49 +0000 (12:08 +1000)
committer Dave Airlie <[email protected]>
Wed, 28 Sep 2016 02:08:49 +0000 (12:08 +1000)
diff --combined MAINTAINERS

index ad310ec42fe7f4c197385b4e838c72f77bc88219,01bff8ea28d88bf8a667100fa37dd376e049634e..703fcb51b7826ceef41f6d8fc8e9a46b8046a419
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -798,6 -798,7 +798,7 @@@ M: Laura Abbott <[email protected]
   M:    Sumit Semwal <[email protected]>
   L:    [email protected]
   S:    Supported
+ F:    Documentation/devicetree/bindings/staging/ion/
   F:    drivers/staging/android/ion
   F:    drivers/staging/android/uapi/ion.h
   F:    drivers/staging/android/uapi/ion_test.h
@@@ -881,6 -882,15 +882,15 @@@ S:       Supporte
   F:    drivers/gpu/drm/arc/
   F:    Documentation/devicetree/bindings/display/snps,arcpgu.txt
   
+ ARM ARCHITECTED TIMER DRIVER
+ M:    Mark Rutland <[email protected]>
+ M:    Marc Zyngier <[email protected]>
+ L:    [email protected] (moderated for non-subscribers)
+ S:    Maintained
+ F:    arch/arm/include/asm/arch_timer.h
+ F:    arch/arm64/include/asm/arch_timer.h
+ F:    drivers/clocksource/arm_arch_timer.c
+ 
   ARM HDLCD DRM DRIVER
   M:    Liviu Dudau <[email protected]>
   S:    Supported
@@@ -1614,7 -1624,8 +1624,8 @@@ N:      rockchi
   
   ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
   M:    Kukjin Kim <[email protected]>
- M:    Krzysztof Kozlowski <[email protected]>
+ M:    Krzysztof Kozlowski <[email protected]>
+ R:    Javier Martinez Canillas <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
   L:    [email protected] (moderated for non-subscribers)
   S:    Maintained
@@@ -1634,7 -1645,6 +1645,6 @@@ F:      drivers/*/*s3c64xx
   F:    drivers/*/*s5pv210*
   F:    drivers/memory/samsung/*
   F:    drivers/soc/samsung/*
- F:    drivers/spi/spi-s3c*
   F:    Documentation/arm/Samsung/
   F:    Documentation/devicetree/bindings/arm/samsung/
   F:    Documentation/devicetree/bindings/sram/samsung-sram.txt
@@@ -1822,6 -1832,7 +1832,7 @@@ T:      git git://git.kernel.org/pub/scm/lin
   ARM/UNIPHIER ARCHITECTURE
   M:    Masahiro Yamada <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git
   S:    Maintained
   F:    arch/arm/boot/dts/uniphier*
   F:    arch/arm/include/asm/hardware/cache-uniphier.h
@@@ -2475,7 -2486,7 +2486,7 @@@ F:      include/net/bluetooth
   BONDING DRIVER
   M:    Jay Vosburgh <[email protected]>
   M:    Veaceslav Falico <[email protected]>
- M:    Andy Gospodarek <[email protected]>
+ M:    Andy Gospodarek <[email protected]>
   L:    [email protected]
   W:    http://sourceforge.net/projects/bonding/
   S:    Supported
@@@ -2490,7 -2501,7 +2501,7 @@@ S:      Supporte
   F:    kernel/bpf/
   
   BROADCOM B44 10/100 ETHERNET DRIVER
- M:    Gary Zambrano <zambrano@broadcom.com>
+ M:    Michael Chan <michael.chan@broadcom.com>
   L:    [email protected]
   S:    Supported
   F:    drivers/net/ethernet/broadcom/b44.*
@@@ -3238,7 -3249,7 +3249,7 @@@ F:      kernel/cpuset.
   CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
   M:    Johannes Weiner <[email protected]>
   M:    Michal Hocko <[email protected]>
- M:    Vladimir Davydov <vdavydov@virtuozzo.com>
+ M:    Vladimir Davydov <vdavydov.dev@gmail.com>
   L:    [email protected]
   L:    [email protected]
   S:    Maintained
@@@ -3259,7 -3270,7 +3270,7 @@@ S:      Maintaine
   F:    drivers/net/wan/cosa*
   
   CPMAC ETHERNET DRIVER
- M:    Florian Fainelli <f[email protected]>
+ M:    Florian Fainelli <f[email protected]>
   L:    [email protected]
   S:    Maintained
   F:    drivers/net/ethernet/ti/cpmac.c
@@@ -4064,14 -4075,6 +4075,14 @@@ S:    Orphan / Obsolet
   F:    drivers/gpu/drm/i810/
   F:    include/uapi/drm/i810_drm.h
   
+ +DRM DRIVERS FOR MEDIATEK
+ +M:    CK Hu <[email protected]>
+ +M:    Philipp Zabel <[email protected]>
+ +L:    [email protected]
+ +S:    Supported
+ +F:    drivers/gpu/drm/mediatek/
+ +F:    Documentation/devicetree/bindings/display/mediatek/
+ +
   DRM DRIVER FOR MSM ADRENO GPU
   M:    Rob Clark <[email protected]>
   L:    [email protected]
@@@ -4533,6 -4536,12 +4544,12 @@@ L:    [email protected]
   S:    Maintained
   F:    drivers/edac/sb_edac.c
   
+ EDAC-SKYLAKE
+ M:    Tony Luck <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    drivers/edac/skx_edac.c
+ 
   EDAC-XGENE
   APPLIED MICRO (APM) X-GENE SOC EDAC
   M:     Loc Ho <[email protected]>
@@@ -6094,7 -6103,7 +6111,7 @@@ S:      Supporte
   F:    drivers/cpufreq/intel_pstate.c
   
   INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
- M:    Maik Broemme <mbroemme@plusserver.de>
+ M:    Maik Broemme <mbroemme@libmpq.org>
   L:    [email protected]
   S:    Maintained
   F:    Documentation/fb/intelfb.txt
@@@ -7457,7 -7466,8 +7474,8 @@@ F:      Documentation/devicetree/bindings/so
   F:    sound/soc/codecs/max9860.*
   
   MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
- M:    Krzysztof Kozlowski <[email protected]>
+ M:    Krzysztof Kozlowski <[email protected]>
+ M:    Bartlomiej Zolnierkiewicz <[email protected]>
   L:    [email protected]
   S:    Supported
   F:    drivers/power/max14577_charger.c
@@@ -7473,7 -7483,8 +7491,8 @@@ F:      include/dt-bindings/*/*max77802.
   
   MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
   M:    Chanwoo Choi <[email protected]>
- M:    Krzysztof Kozlowski <[email protected]>
+ M:    Krzysztof Kozlowski <[email protected]>
+ M:    Bartlomiej Zolnierkiewicz <[email protected]>
   L:    [email protected]
   S:    Supported
   F:    drivers/*/max14577*.c
@@@ -7663,7 -7674,7 +7682,7 @@@ L:      [email protected]
   S:    Supported
   W:    https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
   Q:    http://patchwork.kernel.org/project/linux-rdma/list/
- F:    drivers/infiniband/hw/rxe/
+ F:    drivers/infiniband/sw/rxe/
   F:    include/uapi/rdma/rdma_user_rxe.h
   
   MEMBARRIER SUPPORT
@@@ -8150,6 -8161,15 +8169,15 @@@ S:    Maintaine
   W:    https://fedorahosted.org/dropwatch/
   F:    net/core/drop_monitor.c
   
+ NETWORKING [DSA]
+ M:    Andrew Lunn <[email protected]>
+ M:    Vivien Didelot <[email protected]>
+ M:    Florian Fainelli <[email protected]>
+ S:    Maintained
+ F:    net/dsa/
+ F:    include/net/dsa.h
+ F:    drivers/net/dsa/
+ 
   NETWORKING [GENERAL]
   M:    "David S. Miller" <[email protected]>
   L:    [email protected]
@@@ -9239,7 -9259,7 +9267,7 @@@ F:      drivers/pinctrl/sh-pfc
   
   PIN CONTROLLER - SAMSUNG
   M:    Tomasz Figa <[email protected]>
- M:    Krzysztof Kozlowski <k[email protected]>
+ M:    Krzysztof Kozlowski <k[email protected]>
   M:    Sylwester Nawrocki <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
   L:    [email protected] (moderated for non-subscribers)
@@@ -10172,7 -10192,7 +10200,7 @@@ S:   Maintaine
   F:    drivers/platform/x86/samsung-laptop.c
   
   SAMSUNG AUDIO (ASoC) DRIVERS
- M:    Krzysztof Kozlowski <k[email protected]>
+ M:    Krzysztof Kozlowski <k[email protected]>
   M:    Sangbeom Kim <[email protected]>
   M:    Sylwester Nawrocki <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
@@@ -10187,7 -10207,8 +10215,8 @@@ F:   drivers/video/fbdev/s3c-fb.
   
   SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
   M:    Sangbeom Kim <[email protected]>
- M:    Krzysztof Kozlowski <[email protected]>
+ M:    Krzysztof Kozlowski <[email protected]>
+ M:    Bartlomiej Zolnierkiewicz <[email protected]>
   L:    [email protected]
   L:    [email protected]
   S:    Supported
@@@ -10246,6 -10267,17 +10275,17 @@@ S: Supporte
   L:    [email protected] (moderated for non-subscribers)
   F:    drivers/clk/samsung/
   
+ SAMSUNG SPI DRIVERS
+ M:    Kukjin Kim <[email protected]>
+ M:    Krzysztof Kozlowski <[email protected]>
+ M:    Andi Shyti <[email protected]>
+ L:    [email protected]
+ L:    [email protected] (moderated for non-subscribers)
+ S:    Maintained
+ F:    Documentation/devicetree/bindings/spi/spi-samsung.txt
+ F:    drivers/spi/spi-s3c*
+ F:    include/linux/platform_data/spi-s3c64xx.h
+ 
   SAMSUNG SXGBE DRIVERS
   M:    Byungho An <[email protected]>
   M:    Girish K S <[email protected]>
@@@ -11225,12 -11257,8 +11265,8 @@@ S:  Odd Fixe
   F:    drivers/staging/vt665?/
   
   STAGING - WILC1000 WIFI DRIVER
- M:    Johnny Kim <[email protected]>
- M:    Austin Shin <[email protected]>
- M:    Chris Park <[email protected]>
- M:    Tony Cho <[email protected]>
- M:    Glen Lee <[email protected]>
- M:    Leo Kim <[email protected]>
+ M:    Aditya Shankar <[email protected]>
+ M:    Ganesh Krishna <[email protected]>
   L:    [email protected]
   S:    Supported
   F:    drivers/staging/wilc1000/
@@@ -12550,7 -12578,7 +12586,7 @@@ F:   include/linux/if_*vlan.
   F:    net/8021q/
   
   VLYNQ BUS
- M:    Florian Fainelli <f[email protected]>
+ M:    Florian Fainelli <f[email protected]>
   L:    [email protected] (subscribers-only)
   S:    Maintained
   F:    drivers/vlynq/vlynq.c
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 9d79e4ba0213be8c85a60d4fb6ebb58a60a055b1,700c56baf2de7110fdab3dacf1bee91d1a82dbb7..72c68dbb982136b73f84ad881ca30a90ca2f8866
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu.h
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@@ -51,13 -51,11 +51,13 @@@
   #include "amdgpu_ih.h"
   #include "amdgpu_irq.h"
   #include "amdgpu_ucode.h"
+ +#include "amdgpu_ttm.h"
   #include "amdgpu_gds.h"
   #include "amd_powerplay.h"
   #include "amdgpu_acp.h"
   
   #include "gpu_scheduler.h"
+ +#include "amdgpu_virt.h"
   
   /*
    * Modules parameters.
@@@ -65,7 -63,6 +65,7 @@@
   extern int amdgpu_modeset;
   extern int amdgpu_vram_limit;
   extern int amdgpu_gart_size;
+ +extern int amdgpu_moverate;
   extern int amdgpu_benchmarking;
   extern int amdgpu_testing;
   extern int amdgpu_audio;
@@@ -94,9 -91,6 +94,9 @@@ extern unsigned amdgpu_pcie_lane_cap
   extern unsigned amdgpu_cg_mask;
   extern unsigned amdgpu_pg_mask;
   extern char *amdgpu_disable_cu;
+ +extern int amdgpu_sclk_deep_sleep_en;
+ +extern char *amdgpu_virtual_display;
+ +extern unsigned amdgpu_pp_feature_mask;
   
   #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS                3000
   #define AMDGPU_MAX_USEC_TIMEOUT                       100000  /* 100 ms */
@@@ -111,7 -105,7 +111,7 @@@
   #define AMDGPU_MAX_RINGS                      16
   #define AMDGPU_MAX_GFX_RINGS                  1
   #define AMDGPU_MAX_COMPUTE_RINGS              8
- -#define AMDGPU_MAX_VCE_RINGS                  2
+ +#define AMDGPU_MAX_VCE_RINGS                  3
   
   /* max number of IP instances */
   #define AMDGPU_MAX_SDMA_INSTANCES             2
@@@ -254,9 -248,10 +254,9 @@@ struct amdgpu_vm_pte_funcs 
                          uint64_t pe, uint64_t src,
                          unsigned count);
         /* write pte one entry at a time with addr mapping */
- -      void (*write_pte)(struct amdgpu_ib *ib,
- -                        const dma_addr_t *pages_addr, uint64_t pe,
- -                        uint64_t addr, unsigned count,
- -                        uint32_t incr, uint32_t flags);
+ +      void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
+ +                        uint64_t value, unsigned count,
+ +                        uint32_t incr);
         /* for linear pte/pde updates without addr mapping */
         void (*set_pte_pde)(struct amdgpu_ib *ib,
                             uint64_t pe,
@@@ -321,10 -316,6 +321,10 @@@ struct amdgpu_ring_funcs 
         /* note usage for clock and power gating */
         void (*begin_use)(struct amdgpu_ring *ring);
         void (*end_use)(struct amdgpu_ring *ring);
+ +      void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+ +      void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
+ +      unsigned (*get_emit_ib_size) (struct amdgpu_ring *ring);
+ +      unsigned (*get_dma_frame_size) (struct amdgpu_ring *ring);
   };
   
   /*
@@@ -405,9 -396,48 +405,8 @@@ int amdgpu_fence_wait_empty(struct amdg
   unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
   
   /*
- - * TTM.
+ + * BO.
    */
- -
- -#define AMDGPU_TTM_LRU_SIZE   20
- -
- -struct amdgpu_mman_lru {
- -      struct list_head                *lru[TTM_NUM_MEM_TYPES];
- -      struct list_head                *swap_lru;
- -};
- -
- -struct amdgpu_mman {
- -      struct ttm_bo_global_ref        bo_global_ref;
- -      struct drm_global_reference     mem_global_ref;
- -      struct ttm_bo_device            bdev;
- -      bool                            mem_global_referenced;
- -      bool                            initialized;
- -
- -#if defined(CONFIG_DEBUG_FS)
- -      struct dentry                   *vram;
- -      struct dentry                   *gtt;
- -#endif
- -
- -      /* buffer handling */
- -      const struct amdgpu_buffer_funcs        *buffer_funcs;
- -      struct amdgpu_ring                      *buffer_funcs_ring;
- -      /* Scheduler entity for buffer moves */
- -      struct amd_sched_entity                 entity;
- -
- -      /* custom LRU management */
- -      struct amdgpu_mman_lru                  log2_size[AMDGPU_TTM_LRU_SIZE];
- -      /* guard for log2_size array, don't add anything in between */
- -      struct amdgpu_mman_lru                  guard;
- -};
- -
- -int amdgpu_copy_buffer(struct amdgpu_ring *ring,
- -                     uint64_t src_offset,
- -                     uint64_t dst_offset,
- -                     uint32_t byte_count,
- -                     struct reservation_object *resv,
- -                     struct fence **fence);
- -int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
--
   struct amdgpu_bo_list_entry {
         struct amdgpu_bo                *robj;
         struct ttm_validate_buffer      tv;
@@@ -470,12 -500,10 +469,12 @@@ struct amdgpu_bo 
         struct amdgpu_device            *adev;
         struct drm_gem_object           gem_base;
         struct amdgpu_bo                *parent;
+ +      struct amdgpu_bo                *shadow;
   
         struct ttm_bo_kmap_obj          dma_buf_vmap;
         struct amdgpu_mn                *mn;
         struct list_head                mn_list;
+ +      struct list_head                shadow_list;
   };
   #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
   
@@@ -620,12 -648,11 +619,12 @@@ int amdgpu_gart_table_vram_pin(struct a
   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
   int amdgpu_gart_init(struct amdgpu_device *adev);
   void amdgpu_gart_fini(struct amdgpu_device *adev);
- void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
                         int pages);
- int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                      int pages, struct page **pagelist,
                      dma_addr_t *dma_addr, uint32_t flags);
+ +int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
   
   /*
    * GPU MC structures, functions & helpers
@@@ -652,8 -679,6 +651,8 @@@ struct amdgpu_mc 
         uint32_t                fw_version;
         struct amdgpu_irq_src   vm_fault;
         uint32_t                vram_type;
+ +      uint32_t                srbm_soft_reset;
+ +      struct amdgpu_mode_mc_save save;
   };
   
   /*
@@@ -698,11 -723,10 +697,11 @@@ void amdgpu_doorbell_get_kfd_info(struc
    */
   
   struct amdgpu_flip_work {
- -      struct work_struct              flip_work;
+ +      struct delayed_work             flip_work;
         struct work_struct              unpin_work;
         struct amdgpu_device            *adev;
         int                             crtc_id;
+ +      u32                             target_vblank;
         uint64_t                        base;
         struct drm_pending_vblank_event *event;
         struct amdgpu_bo                *old_rbo;
@@@ -793,17 -817,13 +792,17 @@@ struct amdgpu_ring 
   /* maximum number of VMIDs */
   #define AMDGPU_NUM_VM 16
   
+ +/* Maximum number of PTEs the hardware can write with one command */
+ +#define AMDGPU_VM_MAX_UPDATE_SIZE     0x3FFFF
+ +
   /* number of entries in page table */
   #define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
   
   /* PTBs (Page Table Blocks) need to be aligned to 32K */
   #define AMDGPU_VM_PTB_ALIGN_SIZE   32768
- -#define AMDGPU_VM_PTB_ALIGN_MASK (AMDGPU_VM_PTB_ALIGN_SIZE - 1)
- -#define AMDGPU_VM_PTB_ALIGN(a) (((a) + AMDGPU_VM_PTB_ALIGN_MASK) & ~AMDGPU_VM_PTB_ALIGN_MASK)
+ +
+ +/* LOG2 number of continuous pages for the fragment field */
+ +#define AMDGPU_LOG2_PAGES_PER_FRAG 4
   
   #define AMDGPU_PTE_VALID      (1 << 0)
   #define AMDGPU_PTE_SYSTEM     (1 << 1)
@@@ -815,7 -835,10 +814,7 @@@
   #define AMDGPU_PTE_READABLE   (1 << 5)
   #define AMDGPU_PTE_WRITEABLE  (1 << 6)
   
- -/* PTE (Page Table Entry) fragment field for different page sizes */
- -#define AMDGPU_PTE_FRAG_4KB   (0 << 7)
- -#define AMDGPU_PTE_FRAG_64KB  (4 << 7)
- -#define AMDGPU_LOG2_PAGES_PER_FRAG 4
+ +#define AMDGPU_PTE_FRAG(x)    ((x & 0x1f) << 7)
   
   /* How to programm VM fault handling */
   #define AMDGPU_VM_FAULT_STOP_NEVER    0
@@@ -825,7 -848,6 +824,7 @@@
   struct amdgpu_vm_pt {
         struct amdgpu_bo_list_entry     entry;
         uint64_t                        addr;
+ +      uint64_t                        shadow_addr;
   };
   
   struct amdgpu_vm {
@@@ -928,6 -950,7 +927,6 @@@ int amdgpu_vm_grab_id(struct amdgpu_vm 
                       struct amdgpu_job *job);
   int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
   void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
- -uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
   int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
                                     struct amdgpu_vm *vm);
   int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
@@@ -936,7 -959,7 +935,7 @@@ int amdgpu_vm_clear_invalids(struct amd
                              struct amdgpu_sync *sync);
   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
                         struct amdgpu_bo_va *bo_va,
- -                      struct ttm_mem_reg *mem);
+ +                      bool clear);
   void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
                              struct amdgpu_bo *bo);
   struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
@@@ -971,7 -994,6 +970,7 @@@ struct amdgpu_ctx 
         spinlock_t              ring_lock;
         struct fence            **fences;
         struct amdgpu_ctx_ring  rings[AMDGPU_MAX_RINGS];
+ +      bool preamble_presented;
   };
   
   struct amdgpu_ctx_mgr {
@@@ -1175,10 -1197,6 +1174,10 @@@ struct amdgpu_gfx 
         unsigned                        ce_ram_size;
         struct amdgpu_cu_info           cu_info;
         const struct amdgpu_gfx_funcs   *funcs;
+ +
+ +      /* reset mask */
+ +      uint32_t                        grbm_soft_reset;
+ +      uint32_t                        srbm_soft_reset;
   };
   
   int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@@ -1231,16 -1249,11 +1230,16 @@@ struct amdgpu_cs_parser 
         struct fence                    *fence;
         uint64_t                        bytes_moved_threshold;
         uint64_t                        bytes_moved;
+ +      struct amdgpu_bo_list_entry     *evictable;
   
         /* user fence */
         struct amdgpu_bo_list_entry     uf_entry;
   };
   
+ +#define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */
+ +#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1) /* bit set means preamble IB is first presented in belonging context */
+ +#define AMDGPU_HAVE_CTX_SWITCH              (1 << 2) /* bit set means context switch occured */
+ +
   struct amdgpu_job {
         struct amd_sched_job    base;
         struct amdgpu_device    *adev;
@@@ -1249,10 -1262,9 +1248,10 @@@
         struct amdgpu_sync      sync;
         struct amdgpu_ib        *ibs;
         struct fence            *fence; /* the hw fence */
+ +      uint32_t                preamble_status;
         uint32_t                num_ibs;
         void                    *owner;
- -      uint64_t                ctx;
+ +      uint64_t                fence_ctx; /* the fence_context this job uses */
         bool                    vm_needs_flush;
         unsigned                vm_id;
         uint64_t                vm_pd_addr;
@@@ -1673,7 -1685,6 +1672,7 @@@ struct amdgpu_uvd 
         bool                    address_64_bit;
         bool                    use_ctx_buf;
         struct amd_sched_entity entity;
+ +      uint32_t                srbm_soft_reset;
   };
   
   /*
@@@ -1700,8 -1711,6 +1699,8 @@@ struct amdgpu_vce 
         struct amdgpu_irq_src   irq;
         unsigned                harvest_config;
         struct amd_sched_entity entity;
+ +      uint32_t                srbm_soft_reset;
+ +      unsigned                num_rings;
   };
   
   /*
@@@ -1719,14 -1728,9 +1718,14 @@@ struct amdgpu_sdma_instance 
   
   struct amdgpu_sdma {
         struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+ +#ifdef CONFIG_DRM_AMDGPU_SI
+ +      //SI DMA has a difference trap irq number for the second engine
+ +      struct amdgpu_irq_src   trap_irq_1;
+ +#endif
         struct amdgpu_irq_src   trap_irq;
         struct amdgpu_irq_src   illegal_inst_irq;
         int                     num_instances;
+ +      uint32_t                    srbm_soft_reset;
   };
   
   /*
@@@ -1828,7 -1832,6 +1827,7 @@@ struct amdgpu_asic_funcs 
         bool (*read_disabled_bios)(struct amdgpu_device *adev);
         bool (*read_bios_from_rom)(struct amdgpu_device *adev,
                                    u8 *bios, u32 length_bytes);
+ +      void (*detect_hw_virtualization) (struct amdgpu_device *adev);
         int (*read_register)(struct amdgpu_device *adev, u32 se_num,
                              u32 sh_num, u32 reg_offset, u32 *value);
         void (*set_vga_state)(struct amdgpu_device *adev, bool state);
@@@ -1838,9 -1841,8 +1837,9 @@@
         /* MM block clocks */
         int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
         int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
- -      /* query virtual capabilities */
- -      u32 (*get_virtual_caps)(struct amdgpu_device *adev);
+ +      /* static power management */
+ +      int (*get_pcie_lanes)(struct amdgpu_device *adev);
+ +      void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
   };
   
   /*
@@@ -1933,6 -1935,16 +1932,6 @@@ struct amdgpu_atcs 
   struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev);
   void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
   
- -
- -/* GPU virtualization */
- -#define AMDGPU_VIRT_CAPS_SRIOV_EN       (1 << 0)
- -#define AMDGPU_VIRT_CAPS_IS_VF          (1 << 1)
- -struct amdgpu_virtualization {
- -      bool supports_sr_iov;
- -      bool is_virtual;
- -      u32 caps;
- -};
- -
   /*
    * Core structure, functions and helpers.
    */
@@@ -1946,7 -1958,6 +1945,7 @@@ struct amdgpu_ip_block_status 
         bool valid;
         bool sw;
         bool hw;
+ +      bool hang;
   };
   
   struct amdgpu_device {
@@@ -2005,8 -2016,6 +2004,8 @@@
         spinlock_t pcie_idx_lock;
         amdgpu_rreg_t                   pcie_rreg;
         amdgpu_wreg_t                   pcie_wreg;
+ +      amdgpu_rreg_t                   pciep_rreg;
+ +      amdgpu_wreg_t                   pciep_wreg;
         /* protects concurrent UVD register access */
         spinlock_t uvd_ctx_idx_lock;
         amdgpu_rreg_t                   uvd_ctx_rreg;
@@@ -2047,16 -2056,7 +2046,16 @@@
         atomic64_t                      num_evictions;
         atomic_t                        gpu_reset_counter;
   
+ +      /* data for buffer migration throttling */
+ +      struct {
+ +              spinlock_t              lock;
+ +              s64                     last_update_us;
+ +              s64                     accum_us; /* accumulated microseconds */
+ +              u32                     log2_max_MBps;
+ +      } mm_stats;
+ +
         /* display */
+ +      bool                            enable_virtual_display;
         struct amdgpu_mode_info         mode_info;
         struct work_struct              hotplug_work;
         struct amdgpu_irq_src           crtc_irq;
@@@ -2119,14 -2119,6 +2118,14 @@@
         struct kfd_dev          *kfd;
   
         struct amdgpu_virtualization virtualization;
+ +
+ +      /* link all shadow bo */
+ +      struct list_head                shadow_list;
+ +      struct mutex                    shadow_list_lock;
+ +      /* link all gtt */
+ +      spinlock_t                      gtt_list_lock;
+ +      struct list_head                gtt_list;
+ +
   };
   
   bool amdgpu_device_is_px(struct drm_device *dev);
@@@ -2159,8 -2151,6 +2158,8 @@@ void amdgpu_mm_wdoorbell(struct amdgpu_
   #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
   #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
   #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
+ +#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
+ +#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
   #define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
   #define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
   #define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
@@@ -2204,9 -2194,6 +2203,9 @@@
   #define REG_GET_FIELD(value, reg, field)                              \
         (((value) & REG_FIELD_MASK(reg, field)) >> REG_FIELD_SHIFT(reg, field))
   
+ +#define WREG32_FIELD(reg, field, val) \
+ +      WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+ +
   /*
    * BIOS helpers.
    */
@@@ -2250,17 -2237,14 +2249,17 @@@ amdgpu_get_sdma_instance(struct amdgpu_
   #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
   #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
   #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
- -#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev)))
+ +#define amdgpu_get_pcie_lanes(adev) (adev)->asic_funcs->get_pcie_lanes((adev))
+ +#define amdgpu_set_pcie_lanes(adev, l) (adev)->asic_funcs->set_pcie_lanes((adev), (l))
+ +#define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
   #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
   #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
+ +#define amdgpu_asic_detect_hw_virtualization(adev) (adev)->asic_funcs->detect_hw_virtualization((adev))
   #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
   #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
   #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
   #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
- -#define amdgpu_vm_write_pte(adev, ib, pa, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pa), (pe), (addr), (count), (incr), (flags)))
+ +#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
   #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
   #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
   #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
@@@ -2275,13 -2259,9 +2274,13 @@@
   #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
   #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
   #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
+ +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+ +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
   #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+ +#define amdgpu_ring_get_emit_ib_size(r) (r)->funcs->get_emit_ib_size((r))
+ +#define amdgpu_ring_get_dma_frame_size(r) (r)->funcs->get_dma_frame_size((r))
   #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
   #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
   #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
@@@ -2313,11 -2293,6 +2312,11 @@@
   #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
   #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
   
+ +#define amdgpu_dpm_read_sensor(adev, idx, value) \
+ +      ((adev)->pp_enabled ? \
+ +              (adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value)) : \
+ +              -EINVAL)
+ +
   #define amdgpu_dpm_get_temperature(adev) \
         ((adev)->pp_enabled ?                                           \
               (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
@@@ -2369,6 -2344,11 +2368,6 @@@
               (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
               (adev)->pm.funcs->powergate_vce((adev), (g)))
   
- -#define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
- -      ((adev)->pp_enabled ?                                           \
- -            (adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
- -            (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
- -
   #define amdgpu_dpm_get_current_power_state(adev) \
         (adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
   
@@@ -2409,7 -2389,6 +2408,7 @@@
   
   /* Common functions */
   int amdgpu_gpu_reset(struct amdgpu_device *adev);
+ +bool amdgpu_need_backup(struct amdgpu_device *adev);
   void amdgpu_pci_config_reset(struct amdgpu_device *adev);
   bool amdgpu_card_posted(struct amdgpu_device *adev);
   void amdgpu_update_display_priority(struct amdgpu_device *adev);
@@@ -2435,10 -2414,6 +2434,10 @@@ uint32_t amdgpu_ttm_tt_pte_flags(struc
   void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base);
   void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
   void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
+ +u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev);
+ +int amdgpu_ttm_global_init(struct amdgpu_device *adev);
+ +int amdgpu_ttm_init(struct amdgpu_device *adev);
+ +void amdgpu_ttm_fini(struct amdgpu_device *adev);
   void amdgpu_program_register_sequence(struct amdgpu_device *adev,
                                              const u32 *registers,
                                              const u32 array_size);
@@@ -2450,13 -2425,11 +2449,13 @@@ void amdgpu_register_atpx_handler(void)
   void amdgpu_unregister_atpx_handler(void);
   bool amdgpu_has_atpx_dgpu_power_cntl(void);
   bool amdgpu_is_atpx_hybrid(void);
+ +bool amdgpu_atpx_dgpu_req_power_for_displays(void);
   #else
   static inline void amdgpu_register_atpx_handler(void) {}
   static inline void amdgpu_unregister_atpx_handler(void) {}
   static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
   static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
+ +static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; }
   #endif
   
   /*
@@@ -2473,8 -2446,8 +2472,8 @@@ void amdgpu_driver_postclose_kms(struc
                                  struct drm_file *file_priv);
   void amdgpu_driver_preclose_kms(struct drm_device *dev,
                                 struct drm_file *file_priv);
- -int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon);
- -int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon);
+ +int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon);
+ +int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
   u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
   int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
   void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
@@@ -2520,7 -2493,6 +2519,7 @@@ static inline void amdgpu_acpi_fini(str
   struct amdgpu_bo_va_mapping *
   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
                        uint64_t addr, struct amdgpu_bo **bo);
+ +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
   
   #include "amdgpu_object.h"
   #endif
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c

index 59961db9c390e5aea9725289ce4b2900fbab388c,fe872b82e6191046b526a250e3e11b05af1cdb3f..8e6bf548d68907f871952f0109186a03373cdd5f
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@@ -259,33 -259,6 +259,33 @@@ static const int object_connector_conve
         DRM_MODE_CONNECTOR_Unknown
   };
   
+ +bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev)
+ +{
+ +      struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ +      struct atom_context *ctx = mode_info->atom_context;
+ +      int index = GetIndexIntoMasterTable(DATA, Object_Header);
+ +      u16 size, data_offset;
+ +      u8 frev, crev;
+ +      ATOM_DISPLAY_OBJECT_PATH_TABLE *path_obj;
+ +      ATOM_OBJECT_HEADER *obj_header;
+ +
+ +      if (!amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset))
+ +              return false;
+ +
+ +      if (crev < 2)
+ +              return false;
+ +
+ +      obj_header = (ATOM_OBJECT_HEADER *) (ctx->bios + data_offset);
+ +      path_obj = (ATOM_DISPLAY_OBJECT_PATH_TABLE *)
+ +          (ctx->bios + data_offset +
+ +           le16_to_cpu(obj_header->usDisplayPathTableOffset));
+ +
+ +      if (path_obj->ucNumOfDispPath)
+ +              return true;
+ +      else
+ +              return false;
+ +}
+ +
   bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *adev)
   {
         struct amdgpu_mode_info *mode_info = &adev->mode_info;
@@@ -348,6 -321,19 +348,19 @@@
                             (le16_to_cpu(path->usConnObjectId) &
                              OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
   
+                       /* Skip TV/CV support */
+                       if ((le16_to_cpu(path->usDeviceTag) ==
+                            ATOM_DEVICE_TV1_SUPPORT) ||
+                           (le16_to_cpu(path->usDeviceTag) ==
+                            ATOM_DEVICE_CV_SUPPORT))
+                               continue;
+ 
+                       if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) {
+                               DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n",
+                                         con_obj_id, le16_to_cpu(path->usDeviceTag));
+                               continue;
+                       }
+ 
                         connector_type =
                                 object_connector_convert[con_obj_id];
                         connector_object_id = con_obj_id;
@@@ -978,48 -964,6 +991,48 @@@ int amdgpu_atombios_get_clock_dividers(
                 return -EINVAL;
   
         switch (crev) {
+ +      case 2:
+ +      case 3:
+ +      case 5:
+ +              /* r6xx, r7xx, evergreen, ni, si.
+ +               * TODO: add support for asic_type <= CHIP_RV770*/
+ +              if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
+ +                      args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
+ +
+ +                      amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ +
+ +                      dividers->post_div = args.v3.ucPostDiv;
+ +                      dividers->enable_post_div = (args.v3.ucCntlFlag &
+ +                                                   ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
+ +                      dividers->enable_dithen = (args.v3.ucCntlFlag &
+ +                                                 ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
+ +                      dividers->whole_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDiv);
+ +                      dividers->frac_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDivFrac);
+ +                      dividers->ref_div = args.v3.ucRefDiv;
+ +                      dividers->vco_mode = (args.v3.ucCntlFlag &
+ +                                            ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
+ +              } else {
+ +                      /* for SI we use ComputeMemoryClockParam for memory plls */
+ +                      if (adev->asic_type >= CHIP_TAHITI)
+ +                              return -EINVAL;
+ +                      args.v5.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
+ +                      if (strobe_mode)
+ +                              args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
+ +
+ +                      amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ +
+ +                      dividers->post_div = args.v5.ucPostDiv;
+ +                      dividers->enable_post_div = (args.v5.ucCntlFlag &
+ +                                                   ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
+ +                      dividers->enable_dithen = (args.v5.ucCntlFlag &
+ +                                                 ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
+ +                      dividers->whole_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDiv);
+ +                      dividers->frac_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDivFrac);
+ +                      dividers->ref_div = args.v5.ucRefDiv;
+ +                      dividers->vco_mode = (args.v5.ucCntlFlag &
+ +                                            ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
+ +              }
+ +              break;
         case 4:
                 /* fusion */
                 args.v4.ulClock = cpu_to_le32(clock);   /* 10 khz */
@@@ -1164,32 -1108,6 +1177,32 @@@ void amdgpu_atombios_set_engine_dram_ti
         amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
   }
   
+ +void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
+ +                                        u16 *vddc, u16 *vddci, u16 *mvdd)
+ +{
+ +      struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ +      int index = GetIndexIntoMasterTable(DATA, FirmwareInfo);
+ +      u8 frev, crev;
+ +      u16 data_offset;
+ +      union firmware_info *firmware_info;
+ +
+ +      *vddc = 0;
+ +      *vddci = 0;
+ +      *mvdd = 0;
+ +
+ +      if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ +                                 &frev, &crev, &data_offset)) {
+ +              firmware_info =
+ +                      (union firmware_info *)(mode_info->atom_context->bios +
+ +                                              data_offset);
+ +              *vddc = le16_to_cpu(firmware_info->info_14.usBootUpVDDCVoltage);
+ +              if ((frev == 2) && (crev >= 2)) {
+ +                      *vddci = le16_to_cpu(firmware_info->info_22.usBootUpVDDCIVoltage);
+ +                      *mvdd = le16_to_cpu(firmware_info->info_22.usBootUpMVDDCVoltage);
+ +              }
+ +      }
+ +}
+ +
   union set_voltage {
         struct _SET_VOLTAGE_PS_ALLOCATION alloc;
         struct _SET_VOLTAGE_PARAMETERS v1;
@@@ -1197,52 -1115,6 +1210,52 @@@
         struct _SET_VOLTAGE_PARAMETERS_V1_3 v3;
   };
   
+ +int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
+ +                           u16 voltage_id, u16 *voltage)
+ +{
+ +      union set_voltage args;
+ +      int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
+ +      u8 frev, crev;
+ +
+ +      if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ +              return -EINVAL;
+ +
+ +      switch (crev) {
+ +      case 1:
+ +              return -EINVAL;
+ +      case 2:
+ +              args.v2.ucVoltageType = SET_VOLTAGE_GET_MAX_VOLTAGE;
+ +              args.v2.ucVoltageMode = 0;
+ +              args.v2.usVoltageLevel = 0;
+ +
+ +              amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ +
+ +              *voltage = le16_to_cpu(args.v2.usVoltageLevel);
+ +              break;
+ +      case 3:
+ +              args.v3.ucVoltageType = voltage_type;
+ +              args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
+ +              args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
+ +
+ +              amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ +
+ +              *voltage = le16_to_cpu(args.v3.usVoltageLevel);
+ +              break;
+ +      default:
+ +              DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ +              return -EINVAL;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
+ +                                                    u16 *voltage,
+ +                                                    u16 leakage_idx)
+ +{
+ +      return amdgpu_atombios_get_max_vddc(adev, VOLTAGE_TYPE_VDDC, leakage_idx, voltage);
+ +}
+ +
   void amdgpu_atombios_set_voltage(struct amdgpu_device *adev,
                                  u16 voltage_level,
                                  u8 voltage_type)
@@@ -1463,50 -1335,6 +1476,50 @@@ static ATOM_VOLTAGE_OBJECT_V3 *amdgpu_a
         return NULL;
   }
   
+ +int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
+ +                            u8 voltage_type,
+ +                            u8 *svd_gpio_id, u8 *svc_gpio_id)
+ +{
+ +      int index = GetIndexIntoMasterTable(DATA, VoltageObjectInfo);
+ +      u8 frev, crev;
+ +      u16 data_offset, size;
+ +      union voltage_object_info *voltage_info;
+ +      union voltage_object *voltage_object = NULL;
+ +
+ +      if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+ +                                 &frev, &crev, &data_offset)) {
+ +              voltage_info = (union voltage_object_info *)
+ +                      (adev->mode_info.atom_context->bios + data_offset);
+ +
+ +              switch (frev) {
+ +              case 3:
+ +                      switch (crev) {
+ +                      case 1:
+ +                              voltage_object = (union voltage_object *)
+ +                                      amdgpu_atombios_lookup_voltage_object_v3(&voltage_info->v3,
+ +                                                                    voltage_type,
+ +                                                                    VOLTAGE_OBJ_SVID2);
+ +                              if (voltage_object) {
+ +                                      *svd_gpio_id = voltage_object->v3.asSVID2Obj.ucSVDGpioId;
+ +                                      *svc_gpio_id = voltage_object->v3.asSVID2Obj.ucSVCGpioId;
+ +                              } else {
+ +                                      return -EINVAL;
+ +                              }
+ +                              break;
+ +                      default:
+ +                              DRM_ERROR("unknown voltage object table\n");
+ +                              return -EINVAL;
+ +                      }
+ +                      break;
+ +              default:
+ +                      DRM_ERROR("unknown voltage object table\n");
+ +                      return -EINVAL;
+ +              }
+ +
+ +      }
+ +      return 0;
+ +}
+ +
   bool
   amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
                                 u8 voltage_type, u8 voltage_mode)
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c

index 550c5ee704ec4af5af6a0f376f93b890c052c52c,10b5ddf2c5887c36fd7f5d03f4ff6038c8e3cb8a..dae35a96a694d0b6ffc3de5aae94e7344a69e565
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@@ -29,7 -29,6 +29,7 @@@ struct amdgpu_atpx 
         acpi_handle handle;
         struct amdgpu_atpx_functions functions;
         bool is_hybrid;
+ +      bool dgpu_req_power_for_displays;
   };
   
   static struct amdgpu_atpx_priv {
@@@ -74,10 -73,6 +74,10 @@@ bool amdgpu_is_atpx_hybrid(void) 
         return amdgpu_atpx_priv.atpx.is_hybrid;
   }
   
+ +bool amdgpu_atpx_dgpu_req_power_for_displays(void) {
+ +      return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays;
+ +}
+ +
   /**
    * amdgpu_atpx_call - call an ATPX method
    *
@@@ -205,23 -200,10 +205,14 @@@ static int amdgpu_atpx_validate(struct 
         atpx->is_hybrid = false;
         if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                 printk("ATPX Hybrid Graphics\n");
- #if 1
-               /* This is a temporary hack until the D3 cold support
-                * makes it upstream.  The ATPX power_control method seems
-                * to still work on even if the system should be using
-                * the new standardized hybrid D3 cold ACPI interface.
-                */
-               atpx->functions.power_cntl = true;
- #else
                 atpx->functions.power_cntl = false;
- #endif
                 atpx->is_hybrid = true;
         }
   
+ +      atpx->dgpu_req_power_for_displays = false;
+ +      if (valid_bits & ATPX_DGPU_REQ_POWER_FOR_DISPLAYS)
+ +              atpx->dgpu_req_power_for_displays = true;
+ +
         return 0;
   }
   
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

index 4127e7ceace0584e4f6d9052dcd2a8fa077e7e14,ec1282af2479594b9ce4cbf44ebdbfa452e06894..6a6c86c9c1694eb475b9671c47cfaa4738473ed8
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@@ -124,8 -124,7 +124,8 @@@ int amdgpu_ib_schedule(struct amdgpu_ri
         bool skip_preamble, need_ctx_switch;
         unsigned patch_offset = ~0;
         struct amdgpu_vm *vm;
- -      uint64_t ctx;
+ +      uint64_t fence_ctx;
+ +      uint32_t status = 0, alloc_size;
   
         unsigned i;
         int r = 0;
@@@ -136,14 -135,14 +136,14 @@@
         /* ring tests don't use a job */
         if (job) {
                 vm = job->vm;
- -              ctx = job->ctx;
+ +              fence_ctx = job->fence_ctx;
         } else {
                 vm = NULL;
- -              ctx = 0;
+ +              fence_ctx = 0;
         }
   
         if (!ring->ready) {
- -              dev_err(adev->dev, "couldn't schedule ib\n");
+ +              dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
                 return -EINVAL;
         }
   
@@@ -152,10 -151,7 +152,10 @@@
                 return -EINVAL;
         }
   
- -      r = amdgpu_ring_alloc(ring, 256 * num_ibs);
+ +      alloc_size = amdgpu_ring_get_dma_frame_size(ring) +
+ +              num_ibs * amdgpu_ring_get_emit_ib_size(ring);
+ +
+ +      r = amdgpu_ring_alloc(ring, alloc_size);
         if (r) {
                 dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
                 return r;
@@@ -178,22 -174,13 +178,22 @@@
         /* always set cond_exec_polling to CONTINUE */
         *ring->cond_exe_cpu_addr = 1;
   
- -      skip_preamble = ring->current_ctx == ctx;
- -      need_ctx_switch = ring->current_ctx != ctx;
+ +      skip_preamble = ring->current_ctx == fence_ctx;
+ +      need_ctx_switch = ring->current_ctx != fence_ctx;
+ +      if (job && ring->funcs->emit_cntxcntl) {
+ +              if (need_ctx_switch)
+ +                      status |= AMDGPU_HAVE_CTX_SWITCH;
+ +              status |= job->preamble_status;
+ +              amdgpu_ring_emit_cntxcntl(ring, status);
+ +      }
+ +
         for (i = 0; i < num_ibs; ++i) {
                 ib = &ibs[i];
   
                 /* drop preamble IBs if we don't have a context switch */
- -              if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
+ +              if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
+ +                      skip_preamble &&
+ +                      !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST))
                         continue;
   
                 amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
@@@ -222,9 -209,7 +222,9 @@@
         if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
                 amdgpu_ring_patch_cond_exec(ring, patch_offset);
   
- -      ring->current_ctx = ctx;
+ +      ring->current_ctx = fence_ctx;
+ +      if (ring->funcs->emit_switch_buffer)
+ +              amdgpu_ring_emit_switch_buffer(ring);
         amdgpu_ring_commit(ring);
         return 0;
   }
@@@ -295,7 -280,7 +295,7 @@@ void amdgpu_ib_pool_fini(struct amdgpu_
   int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
   {
         unsigned i;
-       int r;
+       int r, ret = 0;
   
         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                 struct amdgpu_ring *ring = adev->rings[i];
@@@ -316,10 -301,11 +316,11 @@@
                         } else {
                                 /* still not good, but we can live with it */
                                 DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r);
+                               ret = r;
                         }
                 }
         }
-       return 0;
+       return ret;
   }
   
   /*
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index b63969d7887caf4d575a35a94b1c9f05ff3c126e,716f2afeb6a9a4a403b56a5c28ddf9de489f8746..160a094e1a934e9269a32ccdadf71fdcae796d32
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@@ -34,7 -34,6 +34,7 @@@
   #include <ttm/ttm_placement.h>
   #include <ttm/ttm_module.h>
   #include <ttm/ttm_page_alloc.h>
+ +#include <ttm/ttm_memory.h>
   #include <drm/drmP.h>
   #include <drm/amdgpu_drm.h>
   #include <linux/seq_file.h>
@@@ -75,7 -74,7 +75,7 @@@ static void amdgpu_ttm_mem_global_relea
         ttm_mem_global_release(ref->object);
   }
   
- -static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
+ +int amdgpu_ttm_global_init(struct amdgpu_device *adev)
   {
         struct drm_global_reference *global_ref;
         struct amdgpu_ring *ring;
@@@ -89,10 -88,10 +89,10 @@@
         global_ref->init = &amdgpu_ttm_mem_global_init;
         global_ref->release = &amdgpu_ttm_mem_global_release;
         r = drm_global_item_ref(global_ref);
- -      if (r != 0) {
+ +      if (r) {
                 DRM_ERROR("Failed setting up TTM memory accounting "
                           "subsystem.\n");
- -              return r;
+ +              goto error_mem;
         }
   
         adev->mman.bo_global_ref.mem_glob =
@@@ -103,30 -102,26 +103,30 @@@
         global_ref->init = &ttm_bo_global_init;
         global_ref->release = &ttm_bo_global_release;
         r = drm_global_item_ref(global_ref);
- -      if (r != 0) {
+ +      if (r) {
                 DRM_ERROR("Failed setting up TTM BO subsystem.\n");
- -              drm_global_item_unref(&adev->mman.mem_global_ref);
- -              return r;
+ +              goto error_bo;
         }
   
         ring = adev->mman.buffer_funcs_ring;
         rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
         r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
                                   rq, amdgpu_sched_jobs);
- -      if (r != 0) {
+ +      if (r) {
                 DRM_ERROR("Failed setting up TTM BO move run queue.\n");
- -              drm_global_item_unref(&adev->mman.mem_global_ref);
- -              drm_global_item_unref(&adev->mman.bo_global_ref.ref);
- -              return r;
+ +              goto error_entity;
         }
   
         adev->mman.mem_global_referenced = true;
   
         return 0;
+ +
+ +error_entity:
+ +      drm_global_item_unref(&adev->mman.bo_global_ref.ref);
+ +error_bo:
+ +      drm_global_item_unref(&adev->mman.mem_global_ref);
+ +error_mem:
+ +      return r;
   }
   
   static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
@@@ -201,7 -196,6 +201,7 @@@ static void amdgpu_evict_flags(struct t
                 .lpfn = 0,
                 .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
         };
+ +      unsigned i;
   
         if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
                 placement->placement = &placements;
@@@ -213,25 -207,10 +213,25 @@@
         rbo = container_of(bo, struct amdgpu_bo, tbo);
         switch (bo->mem.mem_type) {
         case TTM_PL_VRAM:
- -              if (rbo->adev->mman.buffer_funcs_ring->ready == false)
+ +              if (rbo->adev->mman.buffer_funcs_ring->ready == false) {
                         amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_CPU);
- -              else
+ +              } else {
                         amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_GTT);
+ +                      for (i = 0; i < rbo->placement.num_placement; ++i) {
+ +                              if (!(rbo->placements[i].flags &
+ +                                    TTM_PL_FLAG_TT))
+ +                                      continue;
+ +
+ +                              if (rbo->placements[i].lpfn)
+ +                                      continue;
+ +
+ +                              /* set an upper limit to force directly
+ +                               * allocating address space for the BO.
+ +                               */
+ +                              rbo->placements[i].lpfn =
+ +                                      rbo->adev->mc.gtt_size >> PAGE_SHIFT;
+ +                      }
+ +              }
                 break;
         case TTM_PL_TT:
         default:
@@@ -246,8 -225,7 +246,8 @@@ static int amdgpu_verify_access(struct 
   
         if (amdgpu_ttm_tt_get_usermm(bo->ttm))
                 return -EPERM;
- -      return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp);
+ +      return drm_vma_node_verify_access(&rbo->gem_base.vma_node,
+ +                                        filp->private_data);
   }
   
   static void amdgpu_move_null(struct ttm_buffer_object *bo,
@@@ -273,30 -251,26 +273,30 @@@ static int amdgpu_move_blit(struct ttm_
   
         adev = amdgpu_get_adev(bo->bdev);
         ring = adev->mman.buffer_funcs_ring;
-       old_start = old_mem->start << PAGE_SHIFT;
-       new_start = new_mem->start << PAGE_SHIFT;
+       old_start = (u64)old_mem->start << PAGE_SHIFT;
+       new_start = (u64)new_mem->start << PAGE_SHIFT;
   
         switch (old_mem->mem_type) {
- -      case TTM_PL_VRAM:
- -              old_start += adev->mc.vram_start;
- -              break;
         case TTM_PL_TT:
- -              old_start += adev->mc.gtt_start;
+ +              r = amdgpu_ttm_bind(bo->ttm, old_mem);
+ +              if (r)
+ +                      return r;
+ +
+ +      case TTM_PL_VRAM:
+ +              old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
                 break;
         default:
                 DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
                 return -EINVAL;
         }
         switch (new_mem->mem_type) {
- -      case TTM_PL_VRAM:
- -              new_start += adev->mc.vram_start;
- -              break;
         case TTM_PL_TT:
- -              new_start += adev->mc.gtt_start;
+ +              r = amdgpu_ttm_bind(bo->ttm, new_mem);
+ +              if (r)
+ +                      return r;
+ +
+ +      case TTM_PL_VRAM:
+ +              new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
                 break;
         default:
                 DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
@@@ -311,7 -285,7 +311,7 @@@
   
         r = amdgpu_copy_buffer(ring, old_start, new_start,
                                new_mem->num_pages * PAGE_SIZE, /* bytes */
- -                             bo->resv, &fence);
+ +                             bo->resv, &fence, false);
         if (r)
                 return r;
   
@@@ -340,7 -314,7 +340,7 @@@ static int amdgpu_move_vram_ram(struct 
         placement.num_busy_placement = 1;
         placement.busy_placement = &placements;
         placements.fpfn = 0;
- -      placements.lpfn = 0;
+ +      placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
         placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
         r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
                              interruptible, no_wait_gpu);
@@@ -361,7 -335,7 +361,7 @@@
         if (unlikely(r)) {
                 goto out_cleanup;
         }
- -      r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
+ +      r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem);
   out_cleanup:
         ttm_bo_mem_put(bo, &tmp_mem);
         return r;
@@@ -387,14 -361,14 +387,14 @@@ static int amdgpu_move_ram_vram(struct 
         placement.num_busy_placement = 1;
         placement.busy_placement = &placements;
         placements.fpfn = 0;
- -      placements.lpfn = 0;
+ +      placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
         placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
         r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
                              interruptible, no_wait_gpu);
         if (unlikely(r)) {
                 return r;
         }
- -      r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
+ +      r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem);
         if (unlikely(r)) {
                 goto out_cleanup;
         }
@@@ -461,7 -435,8 +461,7 @@@ static int amdgpu_bo_move(struct ttm_bu
   
         if (r) {
   memcpy:
- -              r = ttm_bo_move_memcpy(bo, evict, interruptible,
- -                                     no_wait_gpu, new_mem);
+ +              r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem);
                 if (r) {
                         return r;
                 }
@@@ -549,7 -524,6 +549,7 @@@ struct amdgpu_ttm_tt 
         spinlock_t              guptasklock;
         struct list_head        guptasks;
         atomic_t                mmu_invalidations;
+ +      struct list_head        list;
   };
   
   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
@@@ -667,6 -641,7 +667,6 @@@ static int amdgpu_ttm_backend_bind(stru
                                    struct ttm_mem_reg *bo_mem)
   {
         struct amdgpu_ttm_tt *gtt = (void*)ttm;
- -      uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
         int r;
   
         if (gtt->userptr) {
@@@ -676,7 -651,7 +676,7 @@@
                         return r;
                 }
         }
- -      gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
+ +      gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
         if (!ttm->num_pages) {
                 WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
                      ttm->num_pages, bo_mem, ttm);
@@@ -687,62 -662,14 +687,62 @@@
             bo_mem->mem_type == AMDGPU_PL_OA)
                 return -EINVAL;
   
+ +      return 0;
+ +}
+ +
+ +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
+ +{
+ +      struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ +
+ +      return gtt && !list_empty(&gtt->list);
+ +}
+ +
+ +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
+ +{
+ +      struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ +      uint32_t flags;
+ +      int r;
+ +
+ +      if (!ttm || amdgpu_ttm_is_bound(ttm))
+ +              return 0;
+ +
+ +      flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
         r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
                 ttm->pages, gtt->ttm.dma_address, flags);
   
         if (r) {
- -              DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
- -                        ttm->num_pages, (unsigned)gtt->offset);
+ +              DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+ +                        ttm->num_pages, gtt->offset);
                 return r;
         }
+ +      spin_lock(&gtt->adev->gtt_list_lock);
+ +      list_add_tail(&gtt->list, &gtt->adev->gtt_list);
+ +      spin_unlock(&gtt->adev->gtt_list_lock);
+ +      return 0;
+ +}
+ +
+ +int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
+ +{
+ +      struct amdgpu_ttm_tt *gtt, *tmp;
+ +      struct ttm_mem_reg bo_mem;
+ +      uint32_t flags;
+ +      int r;
+ +
+ +      bo_mem.mem_type = TTM_PL_TT;
+ +      spin_lock(&adev->gtt_list_lock);
+ +      list_for_each_entry_safe(gtt, tmp, &adev->gtt_list, list) {
+ +              flags = amdgpu_ttm_tt_pte_flags(gtt->adev, &gtt->ttm.ttm, &bo_mem);
+ +              r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
+ +                                   gtt->ttm.ttm.pages, gtt->ttm.dma_address,
+ +                                   flags);
+ +              if (r) {
+ +                      spin_unlock(&adev->gtt_list_lock);
+ +                      DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+ +                                gtt->ttm.ttm.num_pages, gtt->offset);
+ +                      return r;
+ +              }
+ +      }
+ +      spin_unlock(&adev->gtt_list_lock);
         return 0;
   }
   
@@@ -750,9 -677,6 +750,9 @@@ static int amdgpu_ttm_backend_unbind(st
   {
         struct amdgpu_ttm_tt *gtt = (void *)ttm;
   
+ +      if (!amdgpu_ttm_is_bound(ttm))
+ +              return 0;
+ +
         /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
         if (gtt->adev->gart.ready)
                 amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
@@@ -760,10 -684,6 +760,10 @@@
         if (gtt->userptr)
                 amdgpu_ttm_tt_unpin_userptr(ttm);
   
+ +      spin_lock(&gtt->adev->gtt_list_lock);
+ +      list_del_init(&gtt->list);
+ +      spin_unlock(&gtt->adev->gtt_list_lock);
+ +
         return 0;
   }
   
@@@ -800,7 -720,6 +800,7 @@@ static struct ttm_tt *amdgpu_ttm_tt_cre
                 kfree(gtt);
                 return NULL;
         }
+ +      INIT_LIST_HEAD(&gtt->list);
         return &gtt->ttm.ttm;
   }
   
@@@ -1072,6 -991,10 +1072,6 @@@ int amdgpu_ttm_init(struct amdgpu_devic
         unsigned i, j;
         int r;
   
- -      r = amdgpu_ttm_global_init(adev);
- -      if (r) {
- -              return r;
- -      }
         /* No others user of address space so set it to 0 */
         r = ttm_bo_device_init(&adev->mman.bdev,
                                adev->mman.bo_global_ref.ref.object,
@@@ -1236,7 -1159,7 +1236,7 @@@ int amdgpu_copy_buffer(struct amdgpu_ri
                        uint64_t dst_offset,
                        uint32_t byte_count,
                        struct reservation_object *resv,
- -                     struct fence **fence)
+ +                     struct fence **fence, bool direct_submit)
   {
         struct amdgpu_device *adev = ring->adev;
         struct amdgpu_job *job;
@@@ -1278,81 -1201,10 +1278,81 @@@
                 byte_count -= cur_size_in_bytes;
         }
   
+ +      amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ +      WARN_ON(job->ibs[0].length_dw > num_dw);
+ +      if (direct_submit) {
+ +              r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
+ +                                     NULL, NULL, fence);
+ +              job->fence = fence_get(*fence);
+ +              if (r)
+ +                      DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ +              amdgpu_job_free(job);
+ +      } else {
+ +              r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+ +                                    AMDGPU_FENCE_OWNER_UNDEFINED, fence);
+ +              if (r)
+ +                      goto error_free;
+ +      }
+ +
+ +      return r;
+ +
+ +error_free:
+ +      amdgpu_job_free(job);
+ +      return r;
+ +}
+ +
+ +int amdgpu_fill_buffer(struct amdgpu_bo *bo,
+ +              uint32_t src_data,
+ +              struct reservation_object *resv,
+ +              struct fence **fence)
+ +{
+ +      struct amdgpu_device *adev = bo->adev;
+ +      struct amdgpu_job *job;
+ +      struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ +
+ +      uint32_t max_bytes, byte_count;
+ +      uint64_t dst_offset;
+ +      unsigned int num_loops, num_dw;
+ +      unsigned int i;
+ +      int r;
+ +
+ +      byte_count = bo->tbo.num_pages << PAGE_SHIFT;
+ +      max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
+ +      num_loops = DIV_ROUND_UP(byte_count, max_bytes);
+ +      num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
+ +
+ +      /* for IB padding */
+ +      while (num_dw & 0x7)
+ +              num_dw++;
+ +
+ +      r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
+ +      if (r)
+ +              return r;
+ +
+ +      if (resv) {
+ +              r = amdgpu_sync_resv(adev, &job->sync, resv,
+ +                              AMDGPU_FENCE_OWNER_UNDEFINED);
+ +              if (r) {
+ +                      DRM_ERROR("sync failed (%d).\n", r);
+ +                      goto error_free;
+ +              }
+ +      }
+ +
+ +      dst_offset = bo->tbo.mem.start << PAGE_SHIFT;
+ +      for (i = 0; i < num_loops; i++) {
+ +              uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
+ +
+ +              amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
+ +                              dst_offset, cur_size_in_bytes);
+ +
+ +              dst_offset += cur_size_in_bytes;
+ +              byte_count -= cur_size_in_bytes;
+ +      }
+ +
         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
         WARN_ON(job->ibs[0].length_dw > num_dw);
         r = amdgpu_job_submit(job, ring, &adev->mman.entity,
- -                            AMDGPU_FENCE_OWNER_UNDEFINED, fence);
+ +                      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
         if (r)
                 goto error_free;
   
@@@ -1543,8 -1395,3 +1543,8 @@@ static void amdgpu_ttm_debugfs_fini(str
   
   #endif
   }
+ +
+ +u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev)
+ +{
+ +      return ttm_get_kernel_zone_memory_size(adev->mman.mem_global_ref.object);
+ +}
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c

index 25dd58a65905d8e4a2e5680842ba27fe36709f0e,4aa993d190189aff82ecc44d18898df8d5164708..cee7bc9a2314dce0c315f768c14dd498e67810dc
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@@ -201,14 -201,39 +201,14 @@@ int amdgpu_uvd_sw_init(struct amdgpu_de
         bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
                   +  AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
                   +  AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
- -      r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true,
- -                           AMDGPU_GEM_DOMAIN_VRAM,
- -                           AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
- -                           NULL, NULL, &adev->uvd.vcpu_bo);
+ +      r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ +                                  AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
+ +                                  &adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
         if (r) {
                 dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
                 return r;
         }
   
- -      r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
- -      if (r) {
- -              amdgpu_bo_unref(&adev->uvd.vcpu_bo);
- -              dev_err(adev->dev, "(%d) failed to reserve UVD bo\n", r);
- -              return r;
- -      }
- -
- -      r = amdgpu_bo_pin(adev->uvd.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM,
- -                        &adev->uvd.gpu_addr);
- -      if (r) {
- -              amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
- -              amdgpu_bo_unref(&adev->uvd.vcpu_bo);
- -              dev_err(adev->dev, "(%d) UVD bo pin failed\n", r);
- -              return r;
- -      }
- -
- -      r = amdgpu_bo_kmap(adev->uvd.vcpu_bo, &adev->uvd.cpu_addr);
- -      if (r) {
- -              dev_err(adev->dev, "(%d) UVD map failed\n", r);
- -              return r;
- -      }
- -
- -      amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
- -
         ring = &adev->uvd.ring;
         rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
         r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity,
@@@ -249,13 -274,22 +249,13 @@@
   
   int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
   {
- -      int r;
- -
         kfree(adev->uvd.saved_bo);
   
         amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
   
- -      if (adev->uvd.vcpu_bo) {
- -              r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
- -              if (!r) {
- -                      amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
- -                      amdgpu_bo_unpin(adev->uvd.vcpu_bo);
- -                      amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
- -              }
- -
- -              amdgpu_bo_unref(&adev->uvd.vcpu_bo);
- -      }
+ +      amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
+ +                            &adev->uvd.gpu_addr,
+ +                            (void **)&adev->uvd.cpu_addr);
   
         amdgpu_ring_fini(&adev->uvd.ring);
   
@@@ -289,7 -323,7 +289,7 @@@ int amdgpu_uvd_suspend(struct amdgpu_de
         if (!adev->uvd.saved_bo)
                 return -ENOMEM;
   
- -      memcpy(adev->uvd.saved_bo, ptr, size);
+ +      memcpy_fromio(adev->uvd.saved_bo, ptr, size);
   
         return 0;
   }
@@@ -306,7 -340,7 +306,7 @@@ int amdgpu_uvd_resume(struct amdgpu_dev
         ptr = adev->uvd.cpu_addr;
   
         if (adev->uvd.saved_bo != NULL) {
- -              memcpy(ptr, adev->uvd.saved_bo, size);
+ +              memcpy_toio(ptr, adev->uvd.saved_bo, size);
                 kfree(adev->uvd.saved_bo);
                 adev->uvd.saved_bo = NULL;
         } else {
@@@ -315,11 -349,11 +315,11 @@@
   
                 hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
                 offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- -              memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset,
- -                      (adev->uvd.fw->size) - offset);
+ +              memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
+ +                          le32_to_cpu(hdr->ucode_size_bytes));
                 size -= le32_to_cpu(hdr->ucode_size_bytes);
                 ptr += le32_to_cpu(hdr->ucode_size_bytes);
- -              memset(ptr, 0, size);
+ +              memset_io(ptr, 0, size);
         }
   
         return 0;
@@@ -809,7 -843,6 +809,7 @@@ static int amdgpu_uvd_cs_reg(struct amd
                                 return r;
                         break;
                 case mmUVD_ENGINE_CNTL:
+ +              case mmUVD_NO_OP:
                         break;
                 default:
                         DRM_ERROR("Invalid reg 0x%X!\n", reg);
@@@ -882,10 -915,6 +882,10 @@@ int amdgpu_uvd_ring_parse_cs(struct amd
                 return -EINVAL;
         }
   
+ +      r = amdgpu_cs_sysvm_access_required(parser);
+ +      if (r)
+ +              return r;
+ +
         ctx.parser = parser;
         ctx.buf_sizes = buf_sizes;
         ctx.ib_idx = ib_idx;
@@@ -952,10 -981,8 +952,10 @@@ static int amdgpu_uvd_send_msg(struct a
         ib->ptr[3] = addr >> 32;
         ib->ptr[4] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
         ib->ptr[5] = 0;
- -      for (i = 6; i < 16; ++i)
- -              ib->ptr[i] = PACKET2(0);
+ +      for (i = 6; i < 16; i += 2) {
+ +              ib->ptr[i] = PACKET0(mmUVD_NO_OP, 0);
+ +              ib->ptr[i+1] = 0;
+ +      }
         ib->length_dw = 16;
   
         if (direct) {
@@@ -1087,9 -1114,15 +1087,9 @@@ static void amdgpu_uvd_idle_work_handle
   {
         struct amdgpu_device *adev =
                 container_of(work, struct amdgpu_device, uvd.idle_work.work);
- -      unsigned i, fences, handles = 0;
- -
- -      fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
- -
- -      for (i = 0; i < adev->uvd.max_handles; ++i)
- -              if (atomic_read(&adev->uvd.handles[i]))
- -                      ++handles;
+ +      unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
   
- -      if (fences == 0 && handles == 0) {
+ +      if (fences == 0) {
                 if (adev->pm.dpm_enabled) {
                         amdgpu_dpm_enable_uvd(adev, false);
                 } else {
@@@ -1154,7 -1187,8 +1154,8 @@@ int amdgpu_uvd_ring_test_ib(struct amdg
                 r = 0;
         }
   
- error:
         fence_put(fence);
+ 
+ error:
         return r;
   }
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index bd5af328154f92525cea46ac8912f808d6200555,80120fa4092c76164f460ba5cef4de1bce1d6d83..a6a48ed9562e89cf2d229ba0153afc7907ac8dfa
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@@ -51,22 -51,19 +51,22 @@@
    * SI supports 16.
    */
   
- -/* Special value that no flush is necessary */
- -#define AMDGPU_VM_NO_FLUSH (~0ll)
- -
   /* Local structure. Encapsulate some VM table update parameters to reduce
    * the number of function parameters
    */
- -struct amdgpu_vm_update_params {
+ +struct amdgpu_pte_update_params {
+ +      /* amdgpu device we do this update for */
+ +      struct amdgpu_device *adev;
         /* address where to copy page table entries from */
         uint64_t src;
- -      /* DMA addresses to use for mapping */
- -      dma_addr_t *pages_addr;
         /* indirect buffer to fill with commands */
         struct amdgpu_ib *ib;
+ +      /* Function which actually does the update */
+ +      void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
+ +                   uint64_t addr, unsigned count, uint32_t incr,
+ +                   uint32_t flags);
+ +      /* indicate update pt or its shadow */
+ +      bool shadow;
   };
   
   /**
@@@ -470,9 -467,10 +470,9 @@@ struct amdgpu_bo_va *amdgpu_vm_bo_find(
   }
   
   /**
- - * amdgpu_vm_update_pages - helper to call the right asic function
+ + * amdgpu_vm_do_set_ptes - helper to call the right asic function
    *
- - * @adev: amdgpu_device pointer
- - * @vm_update_params: see amdgpu_vm_update_params definition
+ + * @params: see amdgpu_pte_update_params definition
    * @pe: addr of the page entry
    * @addr: dst addr to write into pe
    * @count: number of page entries to update
@@@ -482,46 -480,34 +482,46 @@@
    * Traces the parameters and calls the right asic functions
    * to setup the page table using the DMA.
    */
- -static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
- -                                 struct amdgpu_vm_update_params
- -                                      *vm_update_params,
- -                                 uint64_t pe, uint64_t addr,
- -                                 unsigned count, uint32_t incr,
- -                                 uint32_t flags)
+ +static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
+ +                                uint64_t pe, uint64_t addr,
+ +                                unsigned count, uint32_t incr,
+ +                                uint32_t flags)
   {
         trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
   
- -      if (vm_update_params->src) {
- -              amdgpu_vm_copy_pte(adev, vm_update_params->ib,
- -                      pe, (vm_update_params->src + (addr >> 12) * 8), count);
- -
- -      } else if (vm_update_params->pages_addr) {
- -              amdgpu_vm_write_pte(adev, vm_update_params->ib,
- -                      vm_update_params->pages_addr,
- -                      pe, addr, count, incr, flags);
- -
- -      } else if (count < 3) {
- -              amdgpu_vm_write_pte(adev, vm_update_params->ib, NULL, pe, addr,
- -                                  count, incr, flags);
+ +      if (count < 3) {
+ +              amdgpu_vm_write_pte(params->adev, params->ib, pe,
+ +                                  addr | flags, count, incr);
   
         } else {
- -              amdgpu_vm_set_pte_pde(adev, vm_update_params->ib, pe, addr,
+ +              amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
                                       count, incr, flags);
         }
   }
   
+ +/**
+ + * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
+ + *
+ + * @params: see amdgpu_pte_update_params definition
+ + * @pe: addr of the page entry
+ + * @addr: dst addr to write into pe
+ + * @count: number of page entries to update
+ + * @incr: increase next addr by incr bytes
+ + * @flags: hw access flags
+ + *
+ + * Traces the parameters and calls the DMA function to copy the PTEs.
+ + */
+ +static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
+ +                                 uint64_t pe, uint64_t addr,
+ +                                 unsigned count, uint32_t incr,
+ +                                 uint32_t flags)
+ +{
+ +      trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
+ +
+ +      amdgpu_vm_copy_pte(params->adev, params->ib, pe,
+ +                         (params->src + (addr >> 12) * 8), count);
+ +}
+ +
   /**
    * amdgpu_vm_clear_bo - initially clear the page dir/table
    *
@@@ -537,11 -523,12 +537,11 @@@ static int amdgpu_vm_clear_bo(struct am
         struct amdgpu_ring *ring;
         struct fence *fence = NULL;
         struct amdgpu_job *job;
- -      struct amdgpu_vm_update_params vm_update_params;
+ +      struct amdgpu_pte_update_params params;
         unsigned entries;
         uint64_t addr;
         int r;
   
- -      memset(&vm_update_params, 0, sizeof(vm_update_params));
         ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
   
         r = reservation_object_reserve_shared(bo->tbo.resv);
@@@ -559,10 -546,9 +559,10 @@@
         if (r)
                 goto error;
   
- -      vm_update_params.ib = &job->ibs[0];
- -      amdgpu_vm_update_pages(adev, &vm_update_params, addr, 0, entries,
- -                             0, 0);
+ +      memset(&params, 0, sizeof(params));
+ +      params.adev = adev;
+ +      params.ib = &job->ibs[0];
+ +      amdgpu_vm_do_set_ptes(&params, addr, 0, entries, 0, 0);
         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
   
         WARN_ON(job->ibs[0].length_dw > 64);
@@@ -591,41 -577,55 +591,41 @@@ error
    * Look up the physical address of the page that the pte resolves
    * to and return the pointer for the page table entry.
    */
- -uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
+ +static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
   {
         uint64_t result;
   
- -      if (pages_addr) {
- -              /* page table offset */
- -              result = pages_addr[addr >> PAGE_SHIFT];
- -
- -              /* in case cpu page size != gpu page size*/
- -              result |= addr & (~PAGE_MASK);
+ +      /* page table offset */
+ +      result = pages_addr[addr >> PAGE_SHIFT];
   
- -      } else {
- -              /* No mapping required */
- -              result = addr;
- -      }
+ +      /* in case cpu page size != gpu page size*/
+ +      result |= addr & (~PAGE_MASK);
   
         result &= 0xFFFFFFFFFFFFF000ULL;
   
         return result;
   }
   
- -/**
- - * amdgpu_vm_update_pdes - make sure that page directory is valid
- - *
- - * @adev: amdgpu_device pointer
- - * @vm: requested vm
- - * @start: start of GPU address range
- - * @end: end of GPU address range
- - *
- - * Allocates new page tables if necessary
- - * and updates the page directory.
- - * Returns 0 for success, error for failure.
- - */
- -int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
- -                                  struct amdgpu_vm *vm)
+ +static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev,
+ +                                       struct amdgpu_vm *vm,
+ +                                       bool shadow)
   {
         struct amdgpu_ring *ring;
- -      struct amdgpu_bo *pd = vm->page_directory;
- -      uint64_t pd_addr = amdgpu_bo_gpu_offset(pd);
+ +      struct amdgpu_bo *pd = shadow ? vm->page_directory->shadow :
+ +              vm->page_directory;
+ +      uint64_t pd_addr;
         uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
         uint64_t last_pde = ~0, last_pt = ~0;
         unsigned count = 0, pt_idx, ndw;
         struct amdgpu_job *job;
- -      struct amdgpu_vm_update_params vm_update_params;
+ +      struct amdgpu_pte_update_params params;
         struct fence *fence = NULL;
   
         int r;
   
- -      memset(&vm_update_params, 0, sizeof(vm_update_params));
+ +      if (!pd)
+ +              return 0;
+ +      pd_addr = amdgpu_bo_gpu_offset(pd);
         ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
   
         /* padding, etc. */
@@@ -638,9 -638,7 +638,9 @@@
         if (r)
                 return r;
   
- -      vm_update_params.ib = &job->ibs[0];
+ +      memset(&params, 0, sizeof(params));
+ +      params.adev = adev;
+ +      params.ib = &job->ibs[0];
   
         /* walk over the address space and update the page directory */
         for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
@@@ -651,25 -649,19 +651,25 @@@
                         continue;
   
                 pt = amdgpu_bo_gpu_offset(bo);
- -              if (vm->page_tables[pt_idx].addr == pt)
- -                      continue;
- -              vm->page_tables[pt_idx].addr = pt;
+ +              if (!shadow) {
+ +                      if (vm->page_tables[pt_idx].addr == pt)
+ +                              continue;
+ +                      vm->page_tables[pt_idx].addr = pt;
+ +              } else {
+ +                      if (vm->page_tables[pt_idx].shadow_addr == pt)
+ +                              continue;
+ +                      vm->page_tables[pt_idx].shadow_addr = pt;
+ +              }
   
                 pde = pd_addr + pt_idx * 8;
                 if (((last_pde + 8 * count) != pde) ||
- -                  ((last_pt + incr * count) != pt)) {
+ +                  ((last_pt + incr * count) != pt) ||
+ +                  (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
   
                         if (count) {
- -                              amdgpu_vm_update_pages(adev, &vm_update_params,
- -                                                     last_pde, last_pt,
- -                                                     count, incr,
- -                                                     AMDGPU_PTE_VALID);
+ +                              amdgpu_vm_do_set_ptes(&params, last_pde,
+ +                                                    last_pt, count, incr,
+ +                                                    AMDGPU_PTE_VALID);
                         }
   
                         count = 1;
@@@ -681,14 -673,15 +681,14 @@@
         }
   
         if (count)
- -              amdgpu_vm_update_pages(adev, &vm_update_params,
- -                                      last_pde, last_pt,
- -                                      count, incr, AMDGPU_PTE_VALID);
+ +              amdgpu_vm_do_set_ptes(&params, last_pde, last_pt,
+ +                                    count, incr, AMDGPU_PTE_VALID);
   
- -      if (vm_update_params.ib->length_dw != 0) {
- -              amdgpu_ring_pad_ib(ring, vm_update_params.ib);
+ +      if (params.ib->length_dw != 0) {
+ +              amdgpu_ring_pad_ib(ring, params.ib);
                 amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv,
                                  AMDGPU_FENCE_OWNER_VM);
- -              WARN_ON(vm_update_params.ib->length_dw > ndw);
+ +              WARN_ON(params.ib->length_dw > ndw);
                 r = amdgpu_job_submit(job, ring, &vm->entity,
                                       AMDGPU_FENCE_OWNER_VM, &fence);
                 if (r)
@@@ -710,33 -703,92 +710,33 @@@ error_free
         return r;
   }
   
- -/**
- - * amdgpu_vm_frag_ptes - add fragment information to PTEs
+ +/*
+ + * amdgpu_vm_update_pdes - make sure that page directory is valid
    *
    * @adev: amdgpu_device pointer
- - * @vm_update_params: see amdgpu_vm_update_params definition
- - * @pe_start: first PTE to handle
- - * @pe_end: last PTE to handle
- - * @addr: addr those PTEs should point to
- - * @flags: hw mapping flags
+ + * @vm: requested vm
+ + * @start: start of GPU address range
+ + * @end: end of GPU address range
+ + *
+ + * Allocates new page tables if necessary
+ + * and updates the page directory.
+ + * Returns 0 for success, error for failure.
    */
- -static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
- -                              struct amdgpu_vm_update_params
- -                                      *vm_update_params,
- -                              uint64_t pe_start, uint64_t pe_end,
- -                              uint64_t addr, uint32_t flags)
+ +int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+ +                                   struct amdgpu_vm *vm)
   {
- -      /**
- -       * The MC L1 TLB supports variable sized pages, based on a fragment
- -       * field in the PTE. When this field is set to a non-zero value, page
- -       * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
- -       * flags are considered valid for all PTEs within the fragment range
- -       * and corresponding mappings are assumed to be physically contiguous.
- -       *
- -       * The L1 TLB can store a single PTE for the whole fragment,
- -       * significantly increasing the space available for translation
- -       * caching. This leads to large improvements in throughput when the
- -       * TLB is under pressure.
- -       *
- -       * The L2 TLB distributes small and large fragments into two
- -       * asymmetric partitions. The large fragment cache is significantly
- -       * larger. Thus, we try to use large fragments wherever possible.
- -       * Userspace can support this by aligning virtual base address and
- -       * allocation size to the fragment size.
- -       */
- -
- -      /* SI and newer are optimized for 64KB */
- -      uint64_t frag_flags = AMDGPU_PTE_FRAG_64KB;
- -      uint64_t frag_align = 0x80;
- -
- -      uint64_t frag_start = ALIGN(pe_start, frag_align);
- -      uint64_t frag_end = pe_end & ~(frag_align - 1);
- -
- -      unsigned count;
- -
- -      /* Abort early if there isn't anything to do */
- -      if (pe_start == pe_end)
- -              return;
- -
- -      /* system pages are non continuously */
- -      if (vm_update_params->src || vm_update_params->pages_addr ||
- -              !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
- -
- -              count = (pe_end - pe_start) / 8;
- -              amdgpu_vm_update_pages(adev, vm_update_params, pe_start,
- -                                     addr, count, AMDGPU_GPU_PAGE_SIZE,
- -                                     flags);
- -              return;
- -      }
- -
- -      /* handle the 4K area at the beginning */
- -      if (pe_start != frag_start) {
- -              count = (frag_start - pe_start) / 8;
- -              amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr,
- -                                     count, AMDGPU_GPU_PAGE_SIZE, flags);
- -              addr += AMDGPU_GPU_PAGE_SIZE * count;
- -      }
- -
- -      /* handle the area in the middle */
- -      count = (frag_end - frag_start) / 8;
- -      amdgpu_vm_update_pages(adev, vm_update_params, frag_start, addr, count,
- -                             AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
+ +      int r;
   
- -      /* handle the 4K area at the end */
- -      if (frag_end != pe_end) {
- -              addr += AMDGPU_GPU_PAGE_SIZE * count;
- -              count = (pe_end - frag_end) / 8;
- -              amdgpu_vm_update_pages(adev, vm_update_params, frag_end, addr,
- -                                     count, AMDGPU_GPU_PAGE_SIZE, flags);
- -      }
+ +      r = amdgpu_vm_update_pd_or_shadow(adev, vm, true);
+ +      if (r)
+ +              return r;
+ +      return amdgpu_vm_update_pd_or_shadow(adev, vm, false);
   }
   
   /**
    * amdgpu_vm_update_ptes - make sure that page tables are valid
    *
- - * @adev: amdgpu_device pointer
- - * @vm_update_params: see amdgpu_vm_update_params definition
+ + * @params: see amdgpu_pte_update_params definition
    * @vm: requested vm
    * @start: start of GPU address range
    * @end: end of GPU address range
@@@ -745,14 -797,16 +745,14 @@@
    *
    * Update the page tables in the range @start - @end.
    */
- -static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
- -                                struct amdgpu_vm_update_params
- -                                      *vm_update_params,
+ +static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
                                   struct amdgpu_vm *vm,
                                   uint64_t start, uint64_t end,
                                   uint64_t dst, uint32_t flags)
   {
         const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
   
- -      uint64_t cur_pe_start, cur_pe_end, cur_dst;
+ +      uint64_t cur_pe_start, cur_nptes, cur_dst;
         uint64_t addr; /* next GPU address to be updated */
         uint64_t pt_idx;
         struct amdgpu_bo *pt;
@@@ -763,11 -817,7 +763,11 @@@
         addr = start;
         pt_idx = addr >> amdgpu_vm_block_size;
         pt = vm->page_tables[pt_idx].entry.robj;
- -
+ +      if (params->shadow) {
+ +              if (!pt->shadow)
+ +                      return;
+ +              pt = vm->page_tables[pt_idx].entry.robj->shadow;
+ +      }
         if ((addr & ~mask) == (end & ~mask))
                 nptes = end - addr;
         else
@@@ -775,7 -825,7 +775,7 @@@
   
         cur_pe_start = amdgpu_bo_gpu_offset(pt);
         cur_pe_start += (addr & mask) * 8;
- -      cur_pe_end = cur_pe_start + 8 * nptes;
+ +      cur_nptes = nptes;
         cur_dst = dst;
   
         /* for next ptb*/
@@@ -786,11 -836,6 +786,11 @@@
         while (addr < end) {
                 pt_idx = addr >> amdgpu_vm_block_size;
                 pt = vm->page_tables[pt_idx].entry.robj;
+ +              if (params->shadow) {
+ +                      if (!pt->shadow)
+ +                              return;
+ +                      pt = vm->page_tables[pt_idx].entry.robj->shadow;
+ +              }
   
                 if ((addr & ~mask) == (end & ~mask))
                         nptes = end - addr;
@@@ -800,19 -845,19 +800,19 @@@
                 next_pe_start = amdgpu_bo_gpu_offset(pt);
                 next_pe_start += (addr & mask) * 8;
   
- -              if (cur_pe_end == next_pe_start) {
+ +              if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
+ +                  ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
                         /* The next ptb is consecutive to current ptb.
- -                       * Don't call amdgpu_vm_frag_ptes now.
+ +                       * Don't call the update function now.
                          * Will update two ptbs together in future.
                         */
- -                      cur_pe_end += 8 * nptes;
+ +                      cur_nptes += nptes;
                 } else {
- -                      amdgpu_vm_frag_ptes(adev, vm_update_params,
- -                                          cur_pe_start, cur_pe_end,
- -                                          cur_dst, flags);
+ +                      params->func(params, cur_pe_start, cur_dst, cur_nptes,
+ +                                   AMDGPU_GPU_PAGE_SIZE, flags);
   
                         cur_pe_start = next_pe_start;
- -                      cur_pe_end = next_pe_start + 8 * nptes;
+ +                      cur_nptes = nptes;
                         cur_dst = dst;
                 }
   
@@@ -821,79 -866,8 +821,79 @@@
                 dst += nptes * AMDGPU_GPU_PAGE_SIZE;
         }
   
- -      amdgpu_vm_frag_ptes(adev, vm_update_params, cur_pe_start,
- -                          cur_pe_end, cur_dst, flags);
+ +      params->func(params, cur_pe_start, cur_dst, cur_nptes,
+ +                   AMDGPU_GPU_PAGE_SIZE, flags);
+ +}
+ +
+ +/*
+ + * amdgpu_vm_frag_ptes - add fragment information to PTEs
+ + *
+ + * @params: see amdgpu_pte_update_params definition
+ + * @vm: requested vm
+ + * @start: first PTE to handle
+ + * @end: last PTE to handle
+ + * @dst: addr those PTEs should point to
+ + * @flags: hw mapping flags
+ + */
+ +static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params       *params,
+ +                              struct amdgpu_vm *vm,
+ +                              uint64_t start, uint64_t end,
+ +                              uint64_t dst, uint32_t flags)
+ +{
+ +      /**
+ +       * The MC L1 TLB supports variable sized pages, based on a fragment
+ +       * field in the PTE. When this field is set to a non-zero value, page
+ +       * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
+ +       * flags are considered valid for all PTEs within the fragment range
+ +       * and corresponding mappings are assumed to be physically contiguous.
+ +       *
+ +       * The L1 TLB can store a single PTE for the whole fragment,
+ +       * significantly increasing the space available for translation
+ +       * caching. This leads to large improvements in throughput when the
+ +       * TLB is under pressure.
+ +       *
+ +       * The L2 TLB distributes small and large fragments into two
+ +       * asymmetric partitions. The large fragment cache is significantly
+ +       * larger. Thus, we try to use large fragments wherever possible.
+ +       * Userspace can support this by aligning virtual base address and
+ +       * allocation size to the fragment size.
+ +       */
+ +
+ +      const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
+ +
+ +      uint64_t frag_start = ALIGN(start, frag_align);
+ +      uint64_t frag_end = end & ~(frag_align - 1);
+ +
+ +      uint32_t frag;
+ +
+ +      /* system pages are non continuously */
+ +      if (params->src || !(flags & AMDGPU_PTE_VALID) ||
+ +          (frag_start >= frag_end)) {
+ +
+ +              amdgpu_vm_update_ptes(params, vm, start, end, dst, flags);
+ +              return;
+ +      }
+ +
+ +      /* use more than 64KB fragment size if possible */
+ +      frag = lower_32_bits(frag_start | frag_end);
+ +      frag = likely(frag) ? __ffs(frag) : 31;
+ +
+ +      /* handle the 4K area at the beginning */
+ +      if (start != frag_start) {
+ +              amdgpu_vm_update_ptes(params, vm, start, frag_start,
+ +                                    dst, flags);
+ +              dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
+ +      }
+ +
+ +      /* handle the area in the middle */
+ +      amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
+ +                            flags | AMDGPU_PTE_FRAG(frag));
+ +
+ +      /* handle the 4K area at the end */
+ +      if (frag_end != end) {
+ +              dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
+ +              amdgpu_vm_update_ptes(params, vm, frag_end, end, dst, flags);
+ +      }
   }
   
   /**
@@@ -926,19 -900,14 +926,19 @@@ static int amdgpu_vm_bo_update_mapping(
         void *owner = AMDGPU_FENCE_OWNER_VM;
         unsigned nptes, ncmds, ndw;
         struct amdgpu_job *job;
- -      struct amdgpu_vm_update_params vm_update_params;
+ +      struct amdgpu_pte_update_params params;
         struct fence *f = NULL;
         int r;
   
+ +      memset(&params, 0, sizeof(params));
+ +      params.adev = adev;
+ +      params.src = src;
+ +
         ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
- -      memset(&vm_update_params, 0, sizeof(vm_update_params));
- -      vm_update_params.src = src;
- -      vm_update_params.pages_addr = pages_addr;
+ +
+ +      memset(&params, 0, sizeof(params));
+ +      params.adev = adev;
+ +      params.src = src;
   
         /* sync to everything on unmapping */
         if (!(flags & AMDGPU_PTE_VALID))
@@@ -955,52 -924,30 +955,52 @@@
         /* padding, etc. */
         ndw = 64;
   
- -      if (vm_update_params.src) {
+ +      if (src) {
                 /* only copy commands needed */
                 ndw += ncmds * 7;
   
- -      } else if (vm_update_params.pages_addr) {
- -              /* header for write data commands */
- -              ndw += ncmds * 4;
+ +              params.func = amdgpu_vm_do_copy_ptes;
+ +
+ +      } else if (pages_addr) {
+ +              /* copy commands needed */
+ +              ndw += ncmds * 7;
   
- -              /* body of write data command */
+ +              /* and also PTEs */
                 ndw += nptes * 2;
   
+ +              params.func = amdgpu_vm_do_copy_ptes;
+ +
         } else {
                 /* set page commands needed */
                 ndw += ncmds * 10;
   
                 /* two extra commands for begin/end of fragment */
                 ndw += 2 * 10;
+ +
+ +              params.func = amdgpu_vm_do_set_ptes;
         }
   
         r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
         if (r)
                 return r;
   
- -      vm_update_params.ib = &job->ibs[0];
+ +      params.ib = &job->ibs[0];
+ +
+ +      if (!src && pages_addr) {
+ +              uint64_t *pte;
+ +              unsigned i;
+ +
+ +              /* Put the PTEs at the end of the IB. */
+ +              i = ndw - nptes * 2;
+ +              pte= (uint64_t *)&(job->ibs->ptr[i]);
+ +              params.src = job->ibs->gpu_addr + i * 4;
+ +
+ +              for (i = 0; i < nptes; ++i) {
+ +                      pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
+ +                                                  AMDGPU_GPU_PAGE_SIZE);
+ +                      pte[i] |= flags;
+ +              }
+ +      }
   
         r = amdgpu_sync_fence(adev, &job->sync, exclusive);
         if (r)
@@@ -1015,13 -962,11 +1015,13 @@@
         if (r)
                 goto error_free;
   
- -      amdgpu_vm_update_ptes(adev, &vm_update_params, vm, start,
- -                            last + 1, addr, flags);
+ +      params.shadow = true;
+ +      amdgpu_vm_frag_ptes(&params, vm, start, last + 1, addr, flags);
+ +      params.shadow = false;
+ +      amdgpu_vm_frag_ptes(&params, vm, start, last + 1, addr, flags);
   
- -      amdgpu_ring_pad_ib(ring, vm_update_params.ib);
- -      WARN_ON(vm_update_params.ib->length_dw > ndw);
+ +      amdgpu_ring_pad_ib(ring, params.ib);
+ +      WARN_ON(params.ib->length_dw > ndw);
         r = amdgpu_job_submit(job, ring, &vm->entity,
                               AMDGPU_FENCE_OWNER_VM, &f);
         if (r)
@@@ -1117,32 -1062,28 +1117,32 @@@ static int amdgpu_vm_bo_split_mapping(s
    *
    * @adev: amdgpu_device pointer
    * @bo_va: requested BO and VM object
- - * @mem: ttm mem
+ + * @clear: if true clear the entries
    *
    * Fill in the page table entries for @bo_va.
    * Returns 0 for success, -EINVAL for failure.
- - *
- - * Object have to be reserved and mutex must be locked!
    */
   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
                         struct amdgpu_bo_va *bo_va,
- -                      struct ttm_mem_reg *mem)
+ +                      bool clear)
   {
         struct amdgpu_vm *vm = bo_va->vm;
         struct amdgpu_bo_va_mapping *mapping;
         dma_addr_t *pages_addr = NULL;
         uint32_t gtt_flags, flags;
+ +      struct ttm_mem_reg *mem;
         struct fence *exclusive;
         uint64_t addr;
         int r;
   
- -      if (mem) {
+ +      if (clear) {
+ +              mem = NULL;
+ +              addr = 0;
+ +              exclusive = NULL;
+ +      } else {
                 struct ttm_dma_tt *ttm;
   
+ +              mem = &bo_va->bo->tbo.mem;
                 addr = (u64)mem->start << PAGE_SHIFT;
                 switch (mem->mem_type) {
                 case TTM_PL_TT:
@@@ -1160,11 -1101,13 +1160,11 @@@
                 }
   
                 exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
- -      } else {
- -              addr = 0;
- -              exclusive = NULL;
         }
   
         flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
- -      gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
+ +      gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
+ +              adev == bo_va->bo->adev) ? flags : 0;
   
         spin_lock(&vm->status_lock);
         if (!list_empty(&bo_va->vm_status))
@@@ -1191,7 -1134,7 +1191,7 @@@
         spin_lock(&vm->status_lock);
         list_splice_init(&bo_va->invalids, &bo_va->valids);
         list_del_init(&bo_va->vm_status);
- -      if (!mem)
+ +      if (clear)
                 list_add(&bo_va->vm_status, &vm->cleared);
         spin_unlock(&vm->status_lock);
   
@@@ -1254,7 -1197,7 +1254,7 @@@ int amdgpu_vm_clear_invalids(struct amd
                         struct amdgpu_bo_va, vm_status);
                 spin_unlock(&vm->status_lock);
   
- -              r = amdgpu_vm_bo_update(adev, bo_va, NULL);
+ +              r = amdgpu_vm_bo_update(adev, bo_va, true);
                 if (r)
                         return r;
   
@@@ -1399,8 -1342,7 +1399,8 @@@ int amdgpu_vm_bo_map(struct amdgpu_devi
                 r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
                                      AMDGPU_GPU_PAGE_SIZE, true,
                                      AMDGPU_GEM_DOMAIN_VRAM,
- -                                   AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
+ +                                   AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+ +                                   AMDGPU_GEM_CREATE_SHADOW,
                                      NULL, resv, &pt);
                 if (r)
                         goto error_free;
@@@ -1593,14 -1535,13 +1593,14 @@@ int amdgpu_vm_init(struct amdgpu_devic
         r = amd_sched_entity_init(&ring->sched, &vm->entity,
                                   rq, amdgpu_sched_jobs);
         if (r)
-               return r;
+               goto err;
   
         vm->page_directory_fence = NULL;
   
         r = amdgpu_bo_create(adev, pd_size, align, true,
                              AMDGPU_GEM_DOMAIN_VRAM,
- -                           AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
+ +                           AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+ +                           AMDGPU_GEM_CREATE_SHADOW,
                              NULL, NULL, &vm->page_directory);
         if (r)
                 goto error_free_sched_entity;
@@@ -1624,6 -1565,9 +1624,9 @@@ error_free_page_directory
   error_free_sched_entity:
         amd_sched_entity_fini(&ring->sched, &vm->entity);
   
+ err:
+       drm_free_large(vm->page_tables);
+ 
         return r;
   }
   
@@@ -1656,16 -1600,10 +1659,16 @@@ void amdgpu_vm_fini(struct amdgpu_devic
                 kfree(mapping);
         }
   
- -      for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
+ +      for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) {
+ +              if (vm->page_tables[i].entry.robj &&
+ +                  vm->page_tables[i].entry.robj->shadow)
+ +                      amdgpu_bo_unref(&vm->page_tables[i].entry.robj->shadow);
                 amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
+ +      }
         drm_free_large(vm->page_tables);
   
+ +      if (vm->page_directory->shadow)
+ +              amdgpu_bo_unref(&vm->page_directory->shadow);
         amdgpu_bo_unref(&vm->page_directory);
         fence_put(vm->page_directory_fence);
   }
diff --combined drivers/gpu/drm/amd/amdgpu/cik_sdma.c

index e6d7bf9520a0f6ddbf6829eb5cccacc8cf4300fe,77fdd9911c3cbe1268bb0c841edd6089ee058e18..cb952acc71339e31ac613a896fd1c268b0f327dc
--- 1/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
--- 2/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@@ -52,6 -52,7 +52,7 @@@ static void cik_sdma_set_ring_funcs(str
   static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
   static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
   static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
+ static int cik_sdma_soft_reset(void *handle);
   
   MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
   MODULE_FIRMWARE("radeon/bonaire_sdma1.bin");
@@@ -694,16 -695,24 +695,16 @@@ static void cik_sdma_vm_copy_pte(struc
                                  uint64_t pe, uint64_t src,
                                  unsigned count)
   {
- -      while (count) {
- -              unsigned bytes = count * 8;
- -              if (bytes > 0x1FFFF8)
- -                      bytes = 0x1FFFF8;
- -
- -              ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
- -                      SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
- -              ib->ptr[ib->length_dw++] = bytes;
- -              ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
- -              ib->ptr[ib->length_dw++] = lower_32_bits(src);
- -              ib->ptr[ib->length_dw++] = upper_32_bits(src);
- -              ib->ptr[ib->length_dw++] = lower_32_bits(pe);
- -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- -
- -              pe += bytes;
- -              src += bytes;
- -              count -= bytes / 8;
- -      }
+ +      unsigned bytes = count * 8;
+ +
+ +      ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
+ +              SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+ +      ib->ptr[ib->length_dw++] = bytes;
+ +      ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ +      ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ +      ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ +      ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
   }
   
   /**
@@@ -711,27 -720,39 +712,27 @@@
    *
    * @ib: indirect buffer to fill with commands
    * @pe: addr of the page entry
- - * @addr: dst addr to write into pe
+ + * @value: dst addr to write into pe
    * @count: number of page entries to update
    * @incr: increase next addr by incr bytes
- - * @flags: access flags
    *
    * Update PTEs by writing them manually using sDMA (CIK).
    */
- -static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib,
- -                                const dma_addr_t *pages_addr, uint64_t pe,
- -                                uint64_t addr, unsigned count,
- -                                uint32_t incr, uint32_t flags)
+ +static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ +                                uint64_t value, unsigned count,
+ +                                uint32_t incr)
   {
- -      uint64_t value;
- -      unsigned ndw;
- -
- -      while (count) {
- -              ndw = count * 2;
- -              if (ndw > 0xFFFFE)
- -                      ndw = 0xFFFFE;
- -
- -              /* for non-physically contiguous pages (system) */
- -              ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
- -                      SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
- -              ib->ptr[ib->length_dw++] = pe;
- -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- -              ib->ptr[ib->length_dw++] = ndw;
- -              for (; ndw > 0; ndw -= 2, --count, pe += 8) {
- -                      value = amdgpu_vm_map_gart(pages_addr, addr);
- -                      addr += incr;
- -                      value |= flags;
- -                      ib->ptr[ib->length_dw++] = value;
- -                      ib->ptr[ib->length_dw++] = upper_32_bits(value);
- -              }
+ +      unsigned ndw = count * 2;
+ +
+ +      ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+ +              SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+ +      ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ +      ib->ptr[ib->length_dw++] = ndw;
+ +      for (; ndw > 0; ndw -= 2) {
+ +              ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ +              ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ +              value += incr;
         }
   }
   
@@@ -747,21 -768,40 +748,21 @@@
    *
    * Update the page tables using sDMA (CIK).
    */
- -static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
- -                                  uint64_t pe,
+ +static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
                                     uint64_t addr, unsigned count,
                                     uint32_t incr, uint32_t flags)
   {
- -      uint64_t value;
- -      unsigned ndw;
- -
- -      while (count) {
- -              ndw = count;
- -              if (ndw > 0x7FFFF)
- -                      ndw = 0x7FFFF;
- -
- -              if (flags & AMDGPU_PTE_VALID)
- -                      value = addr;
- -              else
- -                      value = 0;
- -
- -              /* for physically contiguous pages (vram) */
- -              ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
- -              ib->ptr[ib->length_dw++] = pe; /* dst addr */
- -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- -              ib->ptr[ib->length_dw++] = flags; /* mask */
- -              ib->ptr[ib->length_dw++] = 0;
- -              ib->ptr[ib->length_dw++] = value; /* value */
- -              ib->ptr[ib->length_dw++] = upper_32_bits(value);
- -              ib->ptr[ib->length_dw++] = incr; /* increment size */
- -              ib->ptr[ib->length_dw++] = 0;
- -              ib->ptr[ib->length_dw++] = ndw; /* number of entries */
- -
- -              pe += ndw * 8;
- -              addr += ndw * incr;
- -              count -= ndw;
- -      }
+ +      /* for physically contiguous pages (vram) */
+ +      ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
+ +      ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ +      ib->ptr[ib->length_dw++] = flags; /* mask */
+ +      ib->ptr[ib->length_dw++] = 0;
+ +      ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ +      ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ +      ib->ptr[ib->length_dw++] = incr; /* increment size */
+ +      ib->ptr[ib->length_dw++] = 0;
+ +      ib->ptr[ib->length_dw++] = count; /* number of entries */
   }
   
   /**
@@@ -847,22 -887,6 +848,22 @@@ static void cik_sdma_ring_emit_vm_flush
         amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
   }
   
+ +static unsigned cik_sdma_ring_get_emit_ib_size(struct amdgpu_ring *ring)
+ +{
+ +      return
+ +              7 + 4; /* cik_sdma_ring_emit_ib */
+ +}
+ +
+ +static unsigned cik_sdma_ring_get_dma_frame_size(struct amdgpu_ring *ring)
+ +{
+ +      return
+ +              6 + /* cik_sdma_ring_emit_hdp_flush */
+ +              3 + /* cik_sdma_ring_emit_hdp_invalidate */
+ +              6 + /* cik_sdma_ring_emit_pipeline_sync */
+ +              12 + /* cik_sdma_ring_emit_vm_flush */
+ +              9 + 9 + 9; /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
+ +}
+ +
   static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
                                  bool enable)
   {
@@@ -1014,6 -1038,8 +1015,8 @@@ static int cik_sdma_resume(void *handle
   {
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   
+       cik_sdma_soft_reset(handle);
+ 
         return cik_sdma_hw_init(adev);
   }
   
@@@ -1236,8 -1262,6 +1239,8 @@@ static const struct amdgpu_ring_funcs c
         .test_ib = cik_sdma_ring_test_ib,
         .insert_nop = cik_sdma_ring_insert_nop,
         .pad_ib = cik_sdma_ring_pad_ib,
+ +      .get_emit_ib_size = cik_sdma_ring_get_emit_ib_size,
+ +      .get_dma_frame_size = cik_sdma_ring_get_dma_frame_size,
   };
   
   static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
diff --combined drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c

index 32a676291e67e14759c5d5dff522167f5a28d9ac,425413fcaf02ffb79436ab2f15f6f7389e8766dc..71116da9e782d52597837b2dfa95a17b2007998d
--- 1/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
--- 2/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@@ -1645,147 -1645,6 +1645,147 @@@ static u32 gfx_v7_0_get_rb_active_bitma
         return (~data) & mask;
   }
   
+ +static void
+ +gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
+ +{
+ +      switch (adev->asic_type) {
+ +      case CHIP_BONAIRE:
+ +              *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
+ +                        SE_XSEL(1) | SE_YSEL(1);
+ +              *rconf1 |= 0x0;
+ +              break;
+ +      case CHIP_HAWAII:
+ +              *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
+ +                        RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
+ +                        PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
+ +                        SE_YSEL(3);
+ +              *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
+ +                         SE_PAIR_YSEL(2);
+ +              break;
+ +      case CHIP_KAVERI:
+ +              *rconf |= RB_MAP_PKR0(2);
+ +              *rconf1 |= 0x0;
+ +              break;
+ +      case CHIP_KABINI:
+ +      case CHIP_MULLINS:
+ +              *rconf |= 0x0;
+ +              *rconf1 |= 0x0;
+ +              break;
+ +      default:
+ +              DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
+ +              break;
+ +      }
+ +}
+ +
+ +static void
+ +gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
+ +                                      u32 raster_config, u32 raster_config_1,
+ +                                      unsigned rb_mask, unsigned num_rb)
+ +{
+ +      unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
+ +      unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
+ +      unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
+ +      unsigned rb_per_se = num_rb / num_se;
+ +      unsigned se_mask[4];
+ +      unsigned se;
+ +
+ +      se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
+ +      se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
+ +      se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
+ +      se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+ +
+ +      WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
+ +      WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
+ +      WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
+ +
+ +      if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
+ +                           (!se_mask[2] && !se_mask[3]))) {
+ +              raster_config_1 &= ~SE_PAIR_MAP_MASK;
+ +
+ +              if (!se_mask[0] && !se_mask[1]) {
+ +                      raster_config_1 |=
+ +                              SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
+ +              } else {
+ +                      raster_config_1 |=
+ +                              SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
+ +              }
+ +      }
+ +
+ +      for (se = 0; se < num_se; se++) {
+ +              unsigned raster_config_se = raster_config;
+ +              unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
+ +              unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+ +              int idx = (se / 2) * 2;
+ +
+ +              if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
+ +                      raster_config_se &= ~SE_MAP_MASK;
+ +
+ +                      if (!se_mask[idx]) {
+ +                              raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
+ +                      } else {
+ +                              raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
+ +                      }
+ +              }
+ +
+ +              pkr0_mask &= rb_mask;
+ +              pkr1_mask &= rb_mask;
+ +              if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
+ +                      raster_config_se &= ~PKR_MAP_MASK;
+ +
+ +                      if (!pkr0_mask) {
+ +                              raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
+ +                      } else {
+ +                              raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
+ +                      }
+ +              }
+ +
+ +              if (rb_per_se >= 2) {
+ +                      unsigned rb0_mask = 1 << (se * rb_per_se);
+ +                      unsigned rb1_mask = rb0_mask << 1;
+ +
+ +                      rb0_mask &= rb_mask;
+ +                      rb1_mask &= rb_mask;
+ +                      if (!rb0_mask || !rb1_mask) {
+ +                              raster_config_se &= ~RB_MAP_PKR0_MASK;
+ +
+ +                              if (!rb0_mask) {
+ +                                      raster_config_se |=
+ +                                              RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
+ +                              } else {
+ +                                      raster_config_se |=
+ +                                              RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
+ +                              }
+ +                      }
+ +
+ +                      if (rb_per_se > 2) {
+ +                              rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
+ +                              rb1_mask = rb0_mask << 1;
+ +                              rb0_mask &= rb_mask;
+ +                              rb1_mask &= rb_mask;
+ +                              if (!rb0_mask || !rb1_mask) {
+ +                                      raster_config_se &= ~RB_MAP_PKR1_MASK;
+ +
+ +                                      if (!rb0_mask) {
+ +                                              raster_config_se |=
+ +                                                      RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
+ +                                      } else {
+ +                                              raster_config_se |=
+ +                                                      RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
+ +                                      }
+ +                              }
+ +                      }
+ +              }
+ +
+ +              /* GRBM_GFX_INDEX has a different offset on CI+ */
+ +              gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ +              WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
+ +              WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
+ +      }
+ +
+ +      /* GRBM_GFX_INDEX has a different offset on CI+ */
+ +      gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ +}
+ +
   /**
    * gfx_v7_0_setup_rb - setup the RBs on the asic
    *
@@@ -1799,11 -1658,9 +1799,11 @@@ static void gfx_v7_0_setup_rb(struct am
   {
         int i, j;
         u32 data;
+ +      u32 raster_config = 0, raster_config_1 = 0;
         u32 active_rbs = 0;
         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
                                         adev->gfx.config.max_sh_per_se;
+ +      unsigned num_rb_pipes;
   
         mutex_lock(&adev->grbm_idx_mutex);
         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
@@@ -1815,25 -1672,10 +1815,25 @@@
                 }
         }
         gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
- -      mutex_unlock(&adev->grbm_idx_mutex);
   
         adev->gfx.config.backend_enable_mask = active_rbs;
         adev->gfx.config.num_rbs = hweight32(active_rbs);
+ +
+ +      num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
+ +                           adev->gfx.config.max_shader_engines, 16);
+ +
+ +      gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
+ +
+ +      if (!adev->gfx.config.backend_enable_mask ||
+ +                      adev->gfx.config.num_rbs >= num_rb_pipes) {
+ +              WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
+ +              WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
+ +      } else {
+ +              gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
+ +                                                      adev->gfx.config.backend_enable_mask,
+ +                                                      num_rb_pipes);
+ +      }
+ +      mutex_unlock(&adev->grbm_idx_mutex);
   }
   
   /**
@@@ -2254,25 -2096,6 +2254,25 @@@ static void gfx_v7_0_ring_emit_ib_compu
         amdgpu_ring_write(ring, control);
   }
   
+ +static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+ +{
+ +      uint32_t dw2 = 0;
+ +
+ +      dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ +      if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ +              /* set load_global_config & load_global_uconfig */
+ +              dw2 |= 0x8001;
+ +              /* set load_cs_sh_regs */
+ +              dw2 |= 0x01000000;
+ +              /* set load_per_context_state & load_gfx_sh_regs */
+ +              dw2 |= 0x10002;
+ +      }
+ +
+ +      amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ +      amdgpu_ring_write(ring, dw2);
+ +      amdgpu_ring_write(ring, 0);
+ +}
+ +
   /**
    * gfx_v7_0_ring_test_ib - basic ring IB test
    *
@@@ -2620,7 -2443,7 +2620,7 @@@ static int gfx_v7_0_cp_gfx_resume(struc
         return 0;
   }
   
- -static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+ +static u32 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
   {
         return ring->adev->wb.wb[ring->rptr_offs];
   }
@@@ -2640,6 -2463,11 +2640,6 @@@ static void gfx_v7_0_ring_set_wptr_gfx(
         (void)RREG32(mmCP_RB0_WPTR);
   }
   
- -static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
- -{
- -      return ring->adev->wb.wb[ring->rptr_offs];
- -}
- -
   static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
   {
         /* XXX check if swapping is necessary on BE */
@@@ -2927,8 -2755,7 +2927,7 @@@ static int gfx_v7_0_cp_compute_resume(s
         u64 wb_gpu_addr;
         u32 *buf;
         struct bonaire_mqd *mqd;
- 
-       gfx_v7_0_cp_compute_enable(adev, true);
+       struct amdgpu_ring *ring;
   
         /* fix up chicken bits */
         tmp = RREG32(mmCP_CPF_DEBUG);
@@@ -2963,7 -2790,7 +2962,7 @@@
   
         /* init the queues.  Just two for now. */
         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-               struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+               ring = &adev->gfx.compute_ring[i];
   
                 if (ring->mqd_obj == NULL) {
                         r = amdgpu_bo_create(adev,
@@@ -3142,6 -2969,13 +3141,13 @@@
                 amdgpu_bo_unreserve(ring->mqd_obj);
   
                 ring->ready = true;
+       }
+ 
+       gfx_v7_0_cp_compute_enable(adev, true);
+ 
+       for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+               ring = &adev->gfx.compute_ring[i];
+ 
                 r = amdgpu_ring_test_ring(ring);
                 if (r)
                         ring->ready = false;
@@@ -4348,41 -4182,6 +4354,41 @@@ static void gfx_v7_0_ring_emit_gds_swit
         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
   }
   
+ +static unsigned gfx_v7_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
+ +{
+ +      return
+ +              4; /* gfx_v7_0_ring_emit_ib_gfx */
+ +}
+ +
+ +static unsigned gfx_v7_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
+ +{
+ +      return
+ +              20 + /* gfx_v7_0_ring_emit_gds_switch */
+ +              7 + /* gfx_v7_0_ring_emit_hdp_flush */
+ +              5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
+ +              12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
+ +              7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
+ +              17 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
+ +              3; /* gfx_v7_ring_emit_cntxcntl */
+ +}
+ +
+ +static unsigned gfx_v7_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
+ +{
+ +      return
+ +              4; /* gfx_v7_0_ring_emit_ib_compute */
+ +}
+ +
+ +static unsigned gfx_v7_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
+ +{
+ +      return
+ +              20 + /* gfx_v7_0_ring_emit_gds_switch */
+ +              7 + /* gfx_v7_0_ring_emit_hdp_flush */
+ +              5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
+ +              7 + /* gfx_v7_0_ring_emit_pipeline_sync */
+ +              17 + /* gfx_v7_0_ring_emit_vm_flush */
+ +              7 + 7 + 7; /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+ +}
+ +
   static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
         .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
         .select_se_sh = &gfx_v7_0_select_se_sh,
@@@ -4672,21 -4471,24 +4678,21 @@@ static int gfx_v7_0_sw_init(void *handl
         }
   
         /* reserve GDS, GWS and OA resource for gfx */
- -      r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
- -                      PAGE_SIZE, true,
- -                      AMDGPU_GEM_DOMAIN_GDS, 0,
- -                      NULL, NULL, &adev->gds.gds_gfx_bo);
+ +      r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
+ +                                  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
+ +                                  &adev->gds.gds_gfx_bo, NULL, NULL);
         if (r)
                 return r;
   
- -      r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
- -              PAGE_SIZE, true,
- -              AMDGPU_GEM_DOMAIN_GWS, 0,
- -              NULL, NULL, &adev->gds.gws_gfx_bo);
+ +      r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
+ +                                  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
+ +                                  &adev->gds.gws_gfx_bo, NULL, NULL);
         if (r)
                 return r;
   
- -      r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
- -                      PAGE_SIZE, true,
- -                      AMDGPU_GEM_DOMAIN_OA, 0,
- -                      NULL, NULL, &adev->gds.oa_gfx_bo);
+ +      r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
+ +                                  PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
+ +                                  &adev->gds.oa_gfx_bo, NULL, NULL);
         if (r)
                 return r;
   
@@@ -4702,9 -4504,9 +4708,9 @@@ static int gfx_v7_0_sw_fini(void *handl
         int i;
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   
- -      amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
- -      amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
- -      amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
+ +      amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
+ +      amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
+ +      amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
   
         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@@ -5135,7 -4937,7 +5141,7 @@@ const struct amd_ip_funcs gfx_v7_0_ip_f
   };
   
   static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
- -      .get_rptr = gfx_v7_0_ring_get_rptr_gfx,
+ +      .get_rptr = gfx_v7_0_ring_get_rptr,
         .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
         .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
         .parse_cs = NULL,
@@@ -5150,13 -4952,10 +5156,13 @@@
         .test_ib = gfx_v7_0_ring_test_ib,
         .insert_nop = amdgpu_ring_insert_nop,
         .pad_ib = amdgpu_ring_generic_pad_ib,
+ +      .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
+ +      .get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_gfx,
+ +      .get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_gfx,
   };
   
   static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
- -      .get_rptr = gfx_v7_0_ring_get_rptr_compute,
+ +      .get_rptr = gfx_v7_0_ring_get_rptr,
         .get_wptr = gfx_v7_0_ring_get_wptr_compute,
         .set_wptr = gfx_v7_0_ring_set_wptr_compute,
         .parse_cs = NULL,
@@@ -5171,8 -4970,6 +5177,8 @@@
         .test_ib = gfx_v7_0_ring_test_ib,
         .insert_nop = amdgpu_ring_insert_nop,
         .pad_ib = amdgpu_ring_generic_pad_ib,
+ +      .get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_compute,
+ +      .get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_compute,
   };
   
   static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
diff --combined drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c

index 9ae307505190ec0afd81bc0b049d7b2b76bff3b0,a64715d90503a87e02478d6fe718eb7c90047b37..565dab3c72186704542b3763fbcdcecb9a6e9cf9
--- 1/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
--- 2/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@@ -190,8 -190,12 +190,8 @@@ out
    */
   static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
   {
- -      u32 rptr;
- -
         /* XXX check if swapping is necessary on BE */
- -      rptr = ring->adev->wb.wb[ring->rptr_offs] >> 2;
- -
- -      return rptr;
+ +      return ring->adev->wb.wb[ring->rptr_offs] >> 2;
   }
   
   /**
@@@ -710,7 -714,7 +710,7 @@@ static int sdma_v2_4_ring_test_ib(struc
                 DRM_ERROR("amdgpu: IB test timed out\n");
                 r = -ETIMEDOUT;
                 goto err1;
-       } else if (r) {
+       } else if (r < 0) {
                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
                 goto err1;
         }
@@@ -745,16 -749,24 +745,16 @@@ static void sdma_v2_4_vm_copy_pte(struc
                                   uint64_t pe, uint64_t src,
                                   unsigned count)
   {
- -      while (count) {
- -              unsigned bytes = count * 8;
- -              if (bytes > 0x1FFFF8)
- -                      bytes = 0x1FFFF8;
- -
- -              ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
- -                      SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
- -              ib->ptr[ib->length_dw++] = bytes;
- -              ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
- -              ib->ptr[ib->length_dw++] = lower_32_bits(src);
- -              ib->ptr[ib->length_dw++] = upper_32_bits(src);
- -              ib->ptr[ib->length_dw++] = lower_32_bits(pe);
- -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- -
- -              pe += bytes;
- -              src += bytes;
- -              count -= bytes / 8;
- -      }
+ +      unsigned bytes = count * 8;
+ +
+ +      ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ +              SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ +      ib->ptr[ib->length_dw++] = bytes;
+ +      ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ +      ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ +      ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ +      ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
   }
   
   /**
@@@ -762,27 -774,39 +762,27 @@@
    *
    * @ib: indirect buffer to fill with commands
    * @pe: addr of the page entry
- - * @addr: dst addr to write into pe
+ + * @value: dst addr to write into pe
    * @count: number of page entries to update
    * @incr: increase next addr by incr bytes
- - * @flags: access flags
    *
    * Update PTEs by writing them manually using sDMA (CIK).
    */
- -static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib,
- -                                 const dma_addr_t *pages_addr, uint64_t pe,
- -                                 uint64_t addr, unsigned count,
- -                                 uint32_t incr, uint32_t flags)
+ +static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ +                                 uint64_t value, unsigned count,
+ +                                 uint32_t incr)
   {
- -      uint64_t value;
- -      unsigned ndw;
- -
- -      while (count) {
- -              ndw = count * 2;
- -              if (ndw > 0xFFFFE)
- -                      ndw = 0xFFFFE;
- -
- -              /* for non-physically contiguous pages (system) */
- -              ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
- -                      SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
- -              ib->ptr[ib->length_dw++] = pe;
- -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- -              ib->ptr[ib->length_dw++] = ndw;
- -              for (; ndw > 0; ndw -= 2, --count, pe += 8) {
- -                      value = amdgpu_vm_map_gart(pages_addr, addr);
- -                      addr += incr;
- -                      value |= flags;
- -                      ib->ptr[ib->length_dw++] = value;
- -                      ib->ptr[ib->length_dw++] = upper_32_bits(value);
- -              }
+ +      unsigned ndw = count * 2;
+ +
+ +      ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ +              SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ +      ib->ptr[ib->length_dw++] = pe;
+ +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ +      ib->ptr[ib->length_dw++] = ndw;
+ +      for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+ +              ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ +              ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ +              value += incr;
         }
   }
   
@@@ -798,21 -822,40 +798,21 @@@
    *
    * Update the page tables using sDMA (CIK).
    */
- -static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
- -                                   uint64_t pe,
+ +static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
                                      uint64_t addr, unsigned count,
                                      uint32_t incr, uint32_t flags)
   {
- -      uint64_t value;
- -      unsigned ndw;
- -
- -      while (count) {
- -              ndw = count;
- -              if (ndw > 0x7FFFF)
- -                      ndw = 0x7FFFF;
- -
- -              if (flags & AMDGPU_PTE_VALID)
- -                      value = addr;
- -              else
- -                      value = 0;
- -
- -              /* for physically contiguous pages (vram) */
- -              ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
- -              ib->ptr[ib->length_dw++] = pe; /* dst addr */
- -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- -              ib->ptr[ib->length_dw++] = flags; /* mask */
- -              ib->ptr[ib->length_dw++] = 0;
- -              ib->ptr[ib->length_dw++] = value; /* value */
- -              ib->ptr[ib->length_dw++] = upper_32_bits(value);
- -              ib->ptr[ib->length_dw++] = incr; /* increment size */
- -              ib->ptr[ib->length_dw++] = 0;
- -              ib->ptr[ib->length_dw++] = ndw; /* number of entries */
- -
- -              pe += ndw * 8;
- -              addr += ndw * incr;
- -              count -= ndw;
- -      }
+ +      /* for physically contiguous pages (vram) */
+ +      ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+ +      ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ +      ib->ptr[ib->length_dw++] = flags; /* mask */
+ +      ib->ptr[ib->length_dw++] = 0;
+ +      ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ +      ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ +      ib->ptr[ib->length_dw++] = incr; /* increment size */
+ +      ib->ptr[ib->length_dw++] = 0;
+ +      ib->ptr[ib->length_dw++] = count; /* number of entries */
   }
   
   /**
@@@ -902,22 -945,6 +902,22 @@@ static void sdma_v2_4_ring_emit_vm_flus
                           SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
   }
   
+ +static unsigned sdma_v2_4_ring_get_emit_ib_size(struct amdgpu_ring *ring)
+ +{
+ +      return
+ +              7 + 6; /* sdma_v2_4_ring_emit_ib */
+ +}
+ +
+ +static unsigned sdma_v2_4_ring_get_dma_frame_size(struct amdgpu_ring *ring)
+ +{
+ +      return
+ +              6 + /* sdma_v2_4_ring_emit_hdp_flush */
+ +              3 + /* sdma_v2_4_ring_emit_hdp_invalidate */
+ +              6 + /* sdma_v2_4_ring_emit_pipeline_sync */
+ +              12 + /* sdma_v2_4_ring_emit_vm_flush */
+ +              10 + 10 + 10; /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
+ +}
+ +
   static int sdma_v2_4_early_init(void *handle)
   {
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@@ -1236,8 -1263,6 +1236,8 @@@ static const struct amdgpu_ring_funcs s
         .test_ib = sdma_v2_4_ring_test_ib,
         .insert_nop = sdma_v2_4_ring_insert_nop,
         .pad_ib = sdma_v2_4_ring_pad_ib,
+ +      .get_emit_ib_size = sdma_v2_4_ring_get_emit_ib_size,
+ +      .get_dma_frame_size = sdma_v2_4_ring_get_dma_frame_size,
   };
   
   static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
diff --combined drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c

index ad494b875311fcdacc0994afd76ad8be8761513b,a7d3cb3fead0f6c63536996c1112f4c76a540e19..453c5d66e5c34abf0e6e3f24bbebcacdf2fcca49
--- 1/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
--- 2/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@@ -142,15 -142,13 +142,15 @@@ int kfd_doorbell_mmap(struct kfd_proces
   
         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
   
- -      pr_debug("mapping doorbell page:\n");
- -      pr_debug("     target user address == 0x%08llX\n",
- -                      (unsigned long long) vma->vm_start);
- -      pr_debug("     physical address    == 0x%08llX\n", address);
- -      pr_debug("     vm_flags            == 0x%04lX\n", vma->vm_flags);
- -      pr_debug("     size                == 0x%04lX\n",
- -                       doorbell_process_allocation());
+ +      pr_debug("kfd: mapping doorbell page in %s\n"
+ +               "     target user address == 0x%08llX\n"
+ +               "     physical address    == 0x%08llX\n"
+ +               "     vm_flags            == 0x%04lX\n"
+ +               "     size                == 0x%04lX\n",
+ +               __func__,
+ +               (unsigned long long) vma->vm_start, address, vma->vm_flags,
+ +               doorbell_process_allocation());
+ +
   
         return io_remap_pfn_range(vma,
                                 vma->vm_start,
@@@ -186,7 -184,7 +186,7 @@@ u32 __iomem *kfd_get_kernel_doorbell(st
                                                         sizeof(u32)) + inx;
   
         pr_debug("kfd: get kernel queue doorbell\n"
-                        "     doorbell offset   == 0x%08d\n"
+                        "     doorbell offset   == 0x%08X\n"
                          "     kernel address    == 0x%08lX\n",
                 *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
   
diff --combined drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c

index 72e6b7dd457b180f85c53438e32f1fc0e804297c,52c527f6642a45efa0d1881b38256bdd75380bfb..9d4c030672f0ff8ae8a830e63afe26a7948707c0
--- 1/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
--- 2/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@@ -320,19 -320,19 +320,19 @@@ atmel_hlcdc_plane_update_pos_and_size(s
                         u32 *coeff_tab = heo_upscaling_ycoef;
                         u32 max_memsize;
   
-                       if (state->crtc_w < state->src_w)
+                       if (state->crtc_h < state->src_h)
                                 coeff_tab = heo_downscaling_ycoef;
                         for (i = 0; i < ARRAY_SIZE(heo_upscaling_ycoef); i++)
                                 atmel_hlcdc_layer_update_cfg(&plane->layer,
                                                              33 + i,
                                                              0xffffffff,
                                                              coeff_tab[i]);
-                       factor = ((8 * 256 * state->src_w) - (256 * 4)) /
-                                state->crtc_w;
+                       factor = ((8 * 256 * state->src_h) - (256 * 4)) /
+                                state->crtc_h;
                         factor++;
-                       max_memsize = ((factor * state->crtc_w) + (256 * 4)) /
+                       max_memsize = ((factor * state->crtc_h) + (256 * 4)) /
                                       2048;
-                       if (max_memsize > state->src_w)
+                       if (max_memsize > state->src_h)
                                 factor--;
                         factor_reg |= (factor << 16) | 0x80000000;
                 }
@@@ -393,7 -393,7 +393,7 @@@ static void atmel_hlcdc_plane_update_fo
   
         if ((state->base.fb->pixel_format == DRM_FORMAT_YUV422 ||
              state->base.fb->pixel_format == DRM_FORMAT_NV61) &&
- -          (state->base.rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270))))
+ +          (state->base.rotation & (DRM_ROTATE_90 | DRM_ROTATE_270)))
                 cfg |= ATMEL_HLCDC_YUV422ROT;
   
         atmel_hlcdc_layer_update_cfg(&plane->layer,
@@@ -628,7 -628,7 +628,7 @@@ static int atmel_hlcdc_plane_atomic_che
         /*
          * Swap width and size in case of 90 or 270 degrees rotation
          */
- -      if (state->base.rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270))) {
+ +      if (state->base.rotation & (DRM_ROTATE_90 | DRM_ROTATE_270)) {
                 tmp = state->crtc_w;
                 state->crtc_w = state->crtc_h;
                 state->crtc_h = tmp;
@@@ -677,7 -677,7 +677,7 @@@
                         return -EINVAL;
   
                 switch (state->base.rotation & DRM_ROTATE_MASK) {
- -              case BIT(DRM_ROTATE_90):
+ +              case DRM_ROTATE_90:
                         offset = ((y_offset + state->src_y + patched_src_w - 1) /
                                   ydiv) * fb->pitches[i];
                         offset += ((x_offset + state->src_x) / xdiv) *
@@@ -686,7 -686,7 +686,7 @@@
                                           fb->pitches[i];
                         state->pstride[i] = -fb->pitches[i] - state->bpp[i];
                         break;
- -              case BIT(DRM_ROTATE_180):
+ +              case DRM_ROTATE_180:
                         offset = ((y_offset + state->src_y + patched_src_h - 1) /
                                   ydiv) * fb->pitches[i];
                         offset += ((x_offset + state->src_x + patched_src_w - 1) /
@@@ -695,7 -695,7 +695,7 @@@
                                            state->bpp[i]) - fb->pitches[i];
                         state->pstride[i] = -2 * state->bpp[i];
                         break;
- -              case BIT(DRM_ROTATE_270):
+ +              case DRM_ROTATE_270:
                         offset = ((y_offset + state->src_y) / ydiv) *
                                  fb->pitches[i];
                         offset += ((x_offset + state->src_x + patched_src_h - 1) /
@@@ -705,7 -705,7 +705,7 @@@
                                           (2 * state->bpp[i]);
                         state->pstride[i] = fb->pitches[i] - state->bpp[i];
                         break;
- -              case BIT(DRM_ROTATE_0):
+ +              case DRM_ROTATE_0:
                 default:
                         offset = ((y_offset + state->src_y) / ydiv) *
                                  fb->pitches[i];
@@@ -755,7 -755,7 +755,7 @@@
   }
   
   static int atmel_hlcdc_plane_prepare_fb(struct drm_plane *p,
- -                                      const struct drm_plane_state *new_state)
+ +                                      struct drm_plane_state *new_state)
   {
         /*
          * FIXME: we should avoid this const -> non-const cast but it's
@@@ -780,7 -780,7 +780,7 @@@
   }
   
   static void atmel_hlcdc_plane_cleanup_fb(struct drm_plane *p,
- -                              const struct drm_plane_state *old_state)
+ +                                       struct drm_plane_state *old_state)
   {
         /*
          * FIXME: we should avoid this const -> non-const cast but it's
@@@ -905,7 -905,7 +905,7 @@@ static void atmel_hlcdc_plane_init_prop
         if (desc->layout.xstride && desc->layout.pstride)
                 drm_object_attach_property(&plane->base.base,
                                 plane->base.dev->mode_config.rotation_property,
- -                              BIT(DRM_ROTATE_0));
+ +                              DRM_ROTATE_0);
   
         if (desc->layout.csc) {
                 /*
@@@ -1056,10 -1056,10 +1056,10 @@@ atmel_hlcdc_plane_create_properties(str
   
         dev->mode_config.rotation_property =
                         drm_mode_create_rotation_property(dev,
- -                                                        BIT(DRM_ROTATE_0) |
- -                                                        BIT(DRM_ROTATE_90) |
- -                                                        BIT(DRM_ROTATE_180) |
- -                                                        BIT(DRM_ROTATE_270));
+ +                                                        DRM_ROTATE_0 |
+ +                                                        DRM_ROTATE_90 |
+ +                                                        DRM_ROTATE_180 |
+ +                                                        DRM_ROTATE_270);
         if (!dev->mode_config.rotation_property)
                 return ERR_PTR(-ENOMEM);
   
diff --combined drivers/gpu/drm/drm_atomic.c

index 904d29c012ad7713e7207ca2d49cfc76552d062d,2a3ded44cf2a689c66ecfd48ce9362004a9a5964..23739609427d86b9cd64d81ddad719bf5fc2bd78
--- 1/drivers/gpu/drm/drm_atomic.c
--- 2/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@@ -475,7 -475,7 +475,7 @@@ int drm_atomic_crtc_set_property(struc
                                         val,
                                         -1,
                                         &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                 return ret;
         } else if (property == config->ctm_property) {
                 ret = drm_atomic_replace_property_blob_from_id(crtc,
@@@ -483,7 -483,7 +483,7 @@@
                                         val,
                                         sizeof(struct drm_color_ctm),
                                         &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                 return ret;
         } else if (property == config->gamma_lut_property) {
                 ret = drm_atomic_replace_property_blob_from_id(crtc,
@@@ -491,7 -491,7 +491,7 @@@
                                         val,
                                         -1,
                                         &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                 return ret;
         } else if (crtc->funcs->atomic_set_property)
                 return crtc->funcs->atomic_set_property(crtc, state, property, val);
@@@ -837,9 -837,8 +837,9 @@@ static int drm_atomic_plane_check(struc
         /* Check whether this plane supports the fb pixel format. */
         ret = drm_plane_check_pixel_format(plane, state->fb->pixel_format);
         if (ret) {
- -              DRM_DEBUG_ATOMIC("Invalid pixel format %s\n",
- -                               drm_get_format_name(state->fb->pixel_format));
+ +              char *format_name = drm_get_format_name(state->fb->pixel_format);
+ +              DRM_DEBUG_ATOMIC("Invalid pixel format %s\n", format_name);
+ +              kfree(format_name);
                 return ret;
         }
   
@@@ -1691,7 -1690,7 +1691,7 @@@ retry
                                 goto out;
                         }
   
- -                      prop = drm_property_find(dev, prop_id);
+ +                      prop = drm_mode_obj_find_prop_id(obj, prop_id);
                         if (!prop) {
                                 drm_mode_object_unreference(obj);
                                 ret = -ENOENT;
diff --combined drivers/gpu/drm/drm_fb_helper.c

index 5e830281bebdba810580a307d9c3c50d7d216151,0a06f9120b5a61b56f0a8db9c77a3374bd20e37d..03414bde1f152637a7ed6002ed8a88e30611fec8
--- 1/drivers/gpu/drm/drm_fb_helper.c
--- 2/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@@ -29,10 -29,10 +29,10 @@@
    */
   #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   
+ +#include <linux/console.h>
   #include <linux/kernel.h>
   #include <linux/sysrq.h>
   #include <linux/slab.h>
- -#include <linux/fb.h>
   #include <linux/module.h>
   #include <drm/drmP.h>
   #include <drm/drm_crtc.h>
@@@ -41,8 -41,6 +41,8 @@@
   #include <drm/drm_atomic.h>
   #include <drm/drm_atomic_helper.h>
   
+ +#include "drm_crtc_helper_internal.h"
+ +
   static bool drm_fbdev_emulation = true;
   module_param_named(fbdev_emulation, drm_fbdev_emulation, bool, 0600);
   MODULE_PARM_DESC(fbdev_emulation,
@@@ -337,7 -335,7 +337,7 @@@ retry
                         goto fail;
                 }
   
- -              plane_state->rotation = BIT(DRM_ROTATE_0);
+ +              plane_state->rotation = DRM_ROTATE_0;
   
                 plane->old_fb = plane->fb;
                 plane_mask |= 1 << drm_plane_index(plane);
@@@ -397,7 -395,7 +397,7 @@@ static int restore_fbdev_mode(struct dr
                 if (dev->mode_config.rotation_property) {
                         drm_mode_plane_set_obj_prop(plane,
                                                     dev->mode_config.rotation_property,
- -                                                  BIT(DRM_ROTATE_0));
+ +                                                  DRM_ROTATE_0);
                 }
         }
   
@@@ -466,7 -464,7 +466,7 @@@ static bool drm_fb_helper_is_bound(stru
   
         /* Sometimes user space wants everything disabled, so don't steal the
          * display if there's a master. */
-       if (lockless_dereference(dev->master))
+       if (READ_ONCE(dev->master))
                 return false;
   
         drm_for_each_crtc(crtc, dev) {
@@@ -620,16 -618,6 +620,16 @@@ static void drm_fb_helper_crtc_free(str
         kfree(helper->crtc_info);
   }
   
+ +static void drm_fb_helper_resume_worker(struct work_struct *work)
+ +{
+ +      struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper,
+ +                                                  resume_work);
+ +
+ +      console_lock();
+ +      fb_set_suspend(helper->fbdev, 0);
+ +      console_unlock();
+ +}
+ +
   static void drm_fb_helper_dirty_work(struct work_struct *work)
   {
         struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper,
@@@ -661,7 -649,6 +661,7 @@@ void drm_fb_helper_prepare(struct drm_d
   {
         INIT_LIST_HEAD(&helper->kernel_fb_list);
         spin_lock_init(&helper->dirty_lock);
+ +      INIT_WORK(&helper->resume_work, drm_fb_helper_resume_worker);
         INIT_WORK(&helper->dirty_work, drm_fb_helper_dirty_work);
         helper->dirty_clip.x1 = helper->dirty_clip.y1 = ~0;
         helper->funcs = funcs;
@@@ -1037,65 -1024,17 +1037,65 @@@ EXPORT_SYMBOL(drm_fb_helper_cfb_imagebl
   /**
    * drm_fb_helper_set_suspend - wrapper around fb_set_suspend
    * @fb_helper: driver-allocated fbdev helper
- - * @state: desired state, zero to resume, non-zero to suspend
+ + * @suspend: whether to suspend or resume
    *
- - * A wrapper around fb_set_suspend implemented by fbdev core
+ + * A wrapper around fb_set_suspend implemented by fbdev core.
+ + * Use drm_fb_helper_set_suspend_unlocked() if you don't need to take
+ + * the lock yourself
    */
- -void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, int state)
+ +void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, bool suspend)
   {
         if (fb_helper && fb_helper->fbdev)
- -              fb_set_suspend(fb_helper->fbdev, state);
+ +              fb_set_suspend(fb_helper->fbdev, suspend);
   }
   EXPORT_SYMBOL(drm_fb_helper_set_suspend);
   
+ +/**
+ + * drm_fb_helper_set_suspend_unlocked - wrapper around fb_set_suspend that also
+ + *                                      takes the console lock
+ + * @fb_helper: driver-allocated fbdev helper
+ + * @suspend: whether to suspend or resume
+ + *
+ + * A wrapper around fb_set_suspend() that takes the console lock. If the lock
+ + * isn't available on resume, a worker is tasked with waiting for the lock
+ + * to become available. The console lock can be pretty contented on resume
+ + * due to all the printk activity.
+ + *
+ + * This function can be called multiple times with the same state since
+ + * &fb_info->state is checked to see if fbdev is running or not before locking.
+ + *
+ + * Use drm_fb_helper_set_suspend() if you need to take the lock yourself.
+ + */
+ +void drm_fb_helper_set_suspend_unlocked(struct drm_fb_helper *fb_helper,
+ +                                      bool suspend)
+ +{
+ +      if (!fb_helper || !fb_helper->fbdev)
+ +              return;
+ +
+ +      /* make sure there's no pending/ongoing resume */
+ +      flush_work(&fb_helper->resume_work);
+ +
+ +      if (suspend) {
+ +              if (fb_helper->fbdev->state != FBINFO_STATE_RUNNING)
+ +                      return;
+ +
+ +              console_lock();
+ +
+ +      } else {
+ +              if (fb_helper->fbdev->state == FBINFO_STATE_RUNNING)
+ +                      return;
+ +
+ +              if (!console_trylock()) {
+ +                      schedule_work(&fb_helper->resume_work);
+ +                      return;
+ +              }
+ +      }
+ +
+ +      fb_set_suspend(fb_helper->fbdev, suspend);
+ +      console_unlock();
+ +}
+ +EXPORT_SYMBOL(drm_fb_helper_set_suspend_unlocked);
+ +
   static int setcolreg(struct drm_crtc *crtc, u16 red, u16 green,
                      u16 blue, u16 regno, struct fb_info *info)
   {
@@@ -2255,7 -2194,7 +2255,7 @@@ EXPORT_SYMBOL(drm_fb_helper_initial_con
    * @fb_helper: the drm_fb_helper
    *
    * Scan the connectors attached to the fb_helper and try to put together a
- - * setup after *notification of a change in output configuration.
+ + * setup after notification of a change in output configuration.
    *
    * Called at runtime, takes the mode config locks to be able to check/change the
    * modeset configuration. Must be run from process context (which usually means
diff --combined drivers/gpu/drm/i915/i915_gem.c

index c8bd02277b7d4ef66a51dafee058dad742a93c49,a77ce9983f69c9965725f806a008fc06a129935f..2c81067589225a89762a4129fac7eeee0abebed9
--- 1/drivers/gpu/drm/i915/i915_gem.c
--- 2/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@@ -29,13 -29,10 +29,13 @@@
   #include <drm/drm_vma_manager.h>
   #include <drm/i915_drm.h>
   #include "i915_drv.h"
+ +#include "i915_gem_dmabuf.h"
   #include "i915_vgpu.h"
   #include "i915_trace.h"
   #include "intel_drv.h"
+ +#include "intel_frontbuffer.h"
   #include "intel_mocs.h"
+ +#include <linux/reservation.h>
   #include <linux/shmem_fs.h>
   #include <linux/slab.h>
   #include <linux/swap.h>
@@@ -44,6 -41,10 +44,6 @@@
   
   static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
   static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
- -static void
- -i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
- -static void
- -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
   
   static bool cpu_cache_is_coherent(struct drm_device *dev,
                                   enum i915_cache_level level)
@@@ -138,6 -139,7 +138,6 @@@ int i915_mutex_lock_interruptible(struc
         if (ret)
                 return ret;
   
- -      WARN_ON(i915_verify_lists(dev));
         return 0;
   }
   
@@@ -154,10 -156,10 +154,10 @@@ i915_gem_get_aperture_ioctl(struct drm_
         pinned = 0;
         mutex_lock(&dev->struct_mutex);
         list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
- -              if (vma->pin_count)
+ +              if (i915_vma_is_pinned(vma))
                         pinned += vma->node.size;
         list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
- -              if (vma->pin_count)
+ +              if (i915_vma_is_pinned(vma))
                         pinned += vma->node.size;
         mutex_unlock(&dev->struct_mutex);
   
@@@ -279,129 -281,23 +279,129 @@@ static const struct drm_i915_gem_object
         .release = i915_gem_object_release_phys,
   };
   
- -static int
- -drop_pages(struct drm_i915_gem_object *obj)
+ +int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
   {
- -      struct i915_vma *vma, *next;
+ +      struct i915_vma *vma;
+ +      LIST_HEAD(still_in_list);
         int ret;
   
- -      drm_gem_object_reference(&obj->base);
- -      list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link)
- -              if (i915_vma_unbind(vma))
- -                      break;
+ +      lockdep_assert_held(&obj->base.dev->struct_mutex);
   
- -      ret = i915_gem_object_put_pages(obj);
- -      drm_gem_object_unreference(&obj->base);
+ +      /* Closed vma are removed from the obj->vma_list - but they may
+ +       * still have an active binding on the object. To remove those we
+ +       * must wait for all rendering to complete to the object (as unbinding
+ +       * must anyway), and retire the requests.
+ +       */
+ +      ret = i915_gem_object_wait_rendering(obj, false);
+ +      if (ret)
+ +              return ret;
+ +
+ +      i915_gem_retire_requests(to_i915(obj->base.dev));
+ +
+ +      while ((vma = list_first_entry_or_null(&obj->vma_list,
+ +                                             struct i915_vma,
+ +                                             obj_link))) {
+ +              list_move_tail(&vma->obj_link, &still_in_list);
+ +              ret = i915_vma_unbind(vma);
+ +              if (ret)
+ +                      break;
+ +      }
+ +      list_splice(&still_in_list, &obj->vma_list);
   
         return ret;
   }
   
+ +/**
+ + * Ensures that all rendering to the object has completed and the object is
+ + * safe to unbind from the GTT or access from the CPU.
+ + * @obj: i915 gem object
+ + * @readonly: waiting for just read access or read-write access
+ + */
+ +int
+ +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+ +                             bool readonly)
+ +{
+ +      struct reservation_object *resv;
+ +      struct i915_gem_active *active;
+ +      unsigned long active_mask;
+ +      int idx;
+ +
+ +      lockdep_assert_held(&obj->base.dev->struct_mutex);
+ +
+ +      if (!readonly) {
+ +              active = obj->last_read;
+ +              active_mask = i915_gem_object_get_active(obj);
+ +      } else {
+ +              active_mask = 1;
+ +              active = &obj->last_write;
+ +      }
+ +
+ +      for_each_active(active_mask, idx) {
+ +              int ret;
+ +
+ +              ret = i915_gem_active_wait(&active[idx],
+ +                                         &obj->base.dev->struct_mutex);
+ +              if (ret)
+ +                      return ret;
+ +      }
+ +
+ +      resv = i915_gem_object_get_dmabuf_resv(obj);
+ +      if (resv) {
+ +              long err;
+ +
+ +              err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
+ +                                                        MAX_SCHEDULE_TIMEOUT);
+ +              if (err < 0)
+ +                      return err;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* A nonblocking variant of the above wait. Must be called prior to
+ + * acquiring the mutex for the object, as the object state may change
+ + * during this call. A reference must be held by the caller for the object.
+ + */
+ +static __must_check int
+ +__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
+ +                      struct intel_rps_client *rps,
+ +                      bool readonly)
+ +{
+ +      struct i915_gem_active *active;
+ +      unsigned long active_mask;
+ +      int idx;
+ +
+ +      active_mask = __I915_BO_ACTIVE(obj);
+ +      if (!active_mask)
+ +              return 0;
+ +
+ +      if (!readonly) {
+ +              active = obj->last_read;
+ +      } else {
+ +              active_mask = 1;
+ +              active = &obj->last_write;
+ +      }
+ +
+ +      for_each_active(active_mask, idx) {
+ +              int ret;
+ +
+ +              ret = i915_gem_active_wait_unlocked(&active[idx],
+ +                                                  I915_WAIT_INTERRUPTIBLE,
+ +                                                  NULL, rps);
+ +              if (ret)
+ +                      return ret;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +static struct intel_rps_client *to_rps_client(struct drm_file *file)
+ +{
+ +      struct drm_i915_file_private *fpriv = file->driver_priv;
+ +
+ +      return &fpriv->rps;
+ +}
+ +
   int
   i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
                             int align)
@@@ -422,11 -318,7 +422,11 @@@
         if (obj->base.filp == NULL)
                 return -EINVAL;
   
- -      ret = drop_pages(obj);
+ +      ret = i915_gem_object_unbind(obj);
+ +      if (ret)
+ +              return ret;
+ +
+ +      ret = i915_gem_object_put_pages(obj);
         if (ret)
                 return ret;
   
@@@ -516,7 -408,7 +516,7 @@@ i915_gem_create(struct drm_file *file
   
         ret = drm_gem_handle_create(file, &obj->base, &handle);
         /* drop reference from allocate - handle holds it now */
- -      drm_gem_object_unreference_unlocked(&obj->base);
+ +      i915_gem_object_put_unlocked(obj);
         if (ret)
                 return ret;
   
@@@ -610,106 -502,33 +610,106 @@@ __copy_from_user_swizzled(char *gpu_vad
    * flush the object from the CPU cache.
    */
   int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
- -                                  int *needs_clflush)
+ +                                  unsigned int *needs_clflush)
   {
         int ret;
   
         *needs_clflush = 0;
   
- -      if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
- -              return -EINVAL;
+ +      if (!i915_gem_object_has_struct_page(obj))
+ +              return -ENODEV;
+ +
+ +      ret = i915_gem_object_wait_rendering(obj, true);
+ +      if (ret)
+ +              return ret;
+ +
+ +      ret = i915_gem_object_get_pages(obj);
+ +      if (ret)
+ +              return ret;
+ +
+ +      i915_gem_object_pin_pages(obj);
+ +
+ +      i915_gem_object_flush_gtt_write_domain(obj);
   
- -      if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
- -              /* If we're not in the cpu read domain, set ourself into the gtt
- -               * read domain and manually flush cachelines (if required). This
- -               * optimizes for the case when the gpu will dirty the data
- -               * anyway again before the next pread happens. */
+ +      /* If we're not in the cpu read domain, set ourself into the gtt
+ +       * read domain and manually flush cachelines (if required). This
+ +       * optimizes for the case when the gpu will dirty the data
+ +       * anyway again before the next pread happens.
+ +       */
+ +      if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
                 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
                                                         obj->cache_level);
- -              ret = i915_gem_object_wait_rendering(obj, true);
+ +
+ +      if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ +              ret = i915_gem_object_set_to_cpu_domain(obj, false);
                 if (ret)
- -                      return ret;
+ +                      goto err_unpin;
+ +
+ +              *needs_clflush = 0;
         }
   
+ +      /* return with the pages pinned */
+ +      return 0;
+ +
+ +err_unpin:
+ +      i915_gem_object_unpin_pages(obj);
+ +      return ret;
+ +}
+ +
+ +int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
+ +                                   unsigned int *needs_clflush)
+ +{
+ +      int ret;
+ +
+ +      *needs_clflush = 0;
+ +      if (!i915_gem_object_has_struct_page(obj))
+ +              return -ENODEV;
+ +
+ +      ret = i915_gem_object_wait_rendering(obj, false);
+ +      if (ret)
+ +              return ret;
+ +
         ret = i915_gem_object_get_pages(obj);
         if (ret)
                 return ret;
   
         i915_gem_object_pin_pages(obj);
   
+ +      i915_gem_object_flush_gtt_write_domain(obj);
+ +
+ +      /* If we're not in the cpu write domain, set ourself into the
+ +       * gtt write domain and manually flush cachelines (as required).
+ +       * This optimizes for the case when the gpu will use the data
+ +       * right away and we therefore have to clflush anyway.
+ +       */
+ +      if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
+ +              *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
+ +
+ +      /* Same trick applies to invalidate partially written cachelines read
+ +       * before writing.
+ +       */
+ +      if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
+ +              *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
+ +                                                       obj->cache_level);
+ +
+ +      if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ +              ret = i915_gem_object_set_to_cpu_domain(obj, true);
+ +              if (ret)
+ +                      goto err_unpin;
+ +
+ +              *needs_clflush = 0;
+ +      }
+ +
+ +      if ((*needs_clflush & CLFLUSH_AFTER) == 0)
+ +              obj->cache_dirty = true;
+ +
+ +      intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+ +      obj->dirty = 1;
+ +      /* return with the pages pinned */
+ +      return 0;
+ +
+ +err_unpin:
+ +      i915_gem_object_unpin_pages(obj);
         return ret;
   }
   
@@@ -819,24 -638,14 +819,24 @@@ i915_gem_gtt_pread(struct drm_device *d
   {
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ +      struct i915_vma *vma;
         struct drm_mm_node node;
         char __user *user_data;
         uint64_t remain;
         uint64_t offset;
         int ret;
   
- -      ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
- -      if (ret) {
+ +      vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+ +      if (!IS_ERR(vma)) {
+ +              node.start = i915_ggtt_offset(vma);
+ +              node.allocated = false;
+ +              ret = i915_vma_put_fence(vma);
+ +              if (ret) {
+ +                      i915_vma_unpin(vma);
+ +                      vma = ERR_PTR(ret);
+ +              }
+ +      }
+ +      if (IS_ERR(vma)) {
                 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
                 if (ret)
                         goto out;
@@@ -848,6 -657,12 +848,6 @@@
                 }
   
                 i915_gem_object_pin_pages(obj);
- -      } else {
- -              node.start = i915_gem_obj_ggtt_offset(obj);
- -              node.allocated = false;
- -              ret = i915_gem_object_put_fence(obj);
- -              if (ret)
- -                      goto out_unpin;
         }
   
         ret = i915_gem_object_set_to_gtt_domain(obj, false);
@@@ -892,7 -707,7 +892,7 @@@
                  * and write to user memory which may result into page
                  * faults, and so we cannot perform this under struct_mutex.
                  */
- -              if (slow_user_access(ggtt->mappable, page_base,
+ +              if (slow_user_access(&ggtt->mappable, page_base,
                                      page_offset, user_data,
                                      page_length, false)) {
                         ret = -EFAULT;
@@@ -924,7 -739,7 +924,7 @@@ out_unpin
                 i915_gem_object_unpin_pages(obj);
                 remove_mappable_node(&node);
         } else {
- -              i915_gem_object_ggtt_unpin(obj);
+ +              i915_vma_unpin(vma);
         }
   out:
         return ret;
@@@ -945,14 -760,19 +945,14 @@@ i915_gem_shmem_pread(struct drm_device 
         int needs_clflush = 0;
         struct sg_page_iter sg_iter;
   
- -      if (!i915_gem_object_has_struct_page(obj))
- -              return -ENODEV;
- -
- -      user_data = u64_to_user_ptr(args->data_ptr);
- -      remain = args->size;
- -
- -      obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
- -
         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
         if (ret)
                 return ret;
   
+ +      obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+ +      user_data = u64_to_user_ptr(args->data_ptr);
         offset = args->offset;
+ +      remain = args->size;
   
         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
                          offset >> PAGE_SHIFT) {
@@@ -1008,7 -828,7 +1008,7 @@@ next_page
         }
   
   out:
- -      i915_gem_object_unpin_pages(obj);
+ +      i915_gem_obj_finish_shmem_access(obj);
   
         return ret;
   }
@@@ -1037,27 -857,25 +1037,27 @@@ i915_gem_pread_ioctl(struct drm_device 
                        args->size))
                 return -EFAULT;
   
- -      ret = i915_mutex_lock_interruptible(dev);
- -      if (ret)
- -              return ret;
- -
- -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- -      if (&obj->base == NULL) {
- -              ret = -ENOENT;
- -              goto unlock;
- -      }
+ +      obj = i915_gem_object_lookup(file, args->handle);
+ +      if (!obj)
+ +              return -ENOENT;
   
         /* Bounds check source.  */
         if (args->offset > obj->base.size ||
             args->size > obj->base.size - args->offset) {
                 ret = -EINVAL;
- -              goto out;
+ +              goto err;
         }
   
         trace_i915_gem_object_pread(obj, args->offset, args->size);
   
+ +      ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
+ +      if (ret)
+ +              goto err;
+ +
+ +      ret = i915_mutex_lock_interruptible(dev);
+ +      if (ret)
+ +              goto err;
+ +
         ret = i915_gem_shmem_pread(dev, obj, args, file);
   
         /* pread for non shmem backed objects */
@@@ -1068,13 -886,10 +1068,13 @@@
                 intel_runtime_pm_put(to_i915(dev));
         }
   
- -out:
- -      drm_gem_object_unreference(&obj->base);
- -unlock:
+ +      i915_gem_object_put(obj);
         mutex_unlock(&dev->struct_mutex);
+ +
+ +      return ret;
+ +
+ +err:
+ +      i915_gem_object_put_unlocked(obj);
         return ret;
   }
   
@@@ -1104,7 -919,7 +1104,7 @@@ fast_user_write(struct io_mapping *mapp
   /**
    * This is the fast pwrite path, where we copy the data directly from the
    * user into the GTT, uncached.
- - * @dev: drm device pointer
+ + * @i915: i915 device private data
    * @obj: i915 gem object
    * @args: pwrite arguments structure
    * @file: drm file pointer
@@@ -1117,28 -932,17 +1117,28 @@@ i915_gem_gtt_pwrite_fast(struct drm_i91
   {
         struct i915_ggtt *ggtt = &i915->ggtt;
         struct drm_device *dev = obj->base.dev;
+ +      struct i915_vma *vma;
         struct drm_mm_node node;
         uint64_t remain, offset;
         char __user *user_data;
         int ret;
         bool hit_slow_path = false;
   
- -      if (obj->tiling_mode != I915_TILING_NONE)
+ +      if (i915_gem_object_is_tiled(obj))
                 return -EFAULT;
   
- -      ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
- -      if (ret) {
+ +      vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+ +                                     PIN_MAPPABLE | PIN_NONBLOCK);
+ +      if (!IS_ERR(vma)) {
+ +              node.start = i915_ggtt_offset(vma);
+ +              node.allocated = false;
+ +              ret = i915_vma_put_fence(vma);
+ +              if (ret) {
+ +                      i915_vma_unpin(vma);
+ +                      vma = ERR_PTR(ret);
+ +              }
+ +      }
+ +      if (IS_ERR(vma)) {
                 ret = insert_mappable_node(i915, &node, PAGE_SIZE);
                 if (ret)
                         goto out;
@@@ -1150,13 -954,19 +1150,13 @@@
                 }
   
                 i915_gem_object_pin_pages(obj);
- -      } else {
- -              node.start = i915_gem_obj_ggtt_offset(obj);
- -              node.allocated = false;
- -              ret = i915_gem_object_put_fence(obj);
- -              if (ret)
- -                      goto out_unpin;
         }
   
         ret = i915_gem_object_set_to_gtt_domain(obj, true);
         if (ret)
                 goto out_unpin;
   
- -      intel_fb_obj_invalidate(obj, ORIGIN_GTT);
+ +      intel_fb_obj_invalidate(obj, ORIGIN_CPU);
         obj->dirty = true;
   
         user_data = u64_to_user_ptr(args->data_ptr);
@@@ -1188,11 -998,11 +1188,11 @@@
                  * If the object is non-shmem backed, we retry again with the
                  * path that handles page fault.
                  */
- -              if (fast_user_write(ggtt->mappable, page_base,
+ +              if (fast_user_write(&ggtt->mappable, page_base,
                                     page_offset, user_data, page_length)) {
                         hit_slow_path = true;
                         mutex_unlock(&dev->struct_mutex);
- -                      if (slow_user_access(ggtt->mappable,
+ +                      if (slow_user_access(&ggtt->mappable,
                                              page_base,
                                              page_offset, user_data,
                                              page_length, true)) {
@@@ -1223,7 -1033,7 +1223,7 @@@ out_flush
                 }
         }
   
- -      intel_fb_obj_flush(obj, false, ORIGIN_GTT);
+ +      intel_fb_obj_flush(obj, false, ORIGIN_CPU);
   out_unpin:
         if (node.allocated) {
                 wmb();
@@@ -1233,7 -1043,7 +1233,7 @@@
                 i915_gem_object_unpin_pages(obj);
                 remove_mappable_node(&node);
         } else {
- -              i915_gem_object_ggtt_unpin(obj);
+ +              i915_vma_unpin(vma);
         }
   out:
         return ret;
@@@ -1316,17 -1126,41 +1316,17 @@@ i915_gem_shmem_pwrite(struct drm_devic
         int shmem_page_offset, page_length, ret = 0;
         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
         int hit_slowpath = 0;
- -      int needs_clflush_after = 0;
- -      int needs_clflush_before = 0;
+ +      unsigned int needs_clflush;
         struct sg_page_iter sg_iter;
   
- -      user_data = u64_to_user_ptr(args->data_ptr);
- -      remain = args->size;
- -
- -      obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
- -
- -      if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
- -              /* If we're not in the cpu write domain, set ourself into the gtt
- -               * write domain and manually flush cachelines (if required). This
- -               * optimizes for the case when the gpu will use the data
- -               * right away and we therefore have to clflush anyway. */
- -              needs_clflush_after = cpu_write_needs_clflush(obj);
- -              ret = i915_gem_object_wait_rendering(obj, false);
- -              if (ret)
- -                      return ret;
- -      }
- -      /* Same trick applies to invalidate partially written cachelines read
- -       * before writing. */
- -      if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
- -              needs_clflush_before =
- -                      !cpu_cache_is_coherent(dev, obj->cache_level);
- -
- -      ret = i915_gem_object_get_pages(obj);
+ +      ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
         if (ret)
                 return ret;
   
- -      intel_fb_obj_invalidate(obj, ORIGIN_CPU);
- -
- -      i915_gem_object_pin_pages(obj);
- -
+ +      obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+ +      user_data = u64_to_user_ptr(args->data_ptr);
         offset = args->offset;
- -      obj->dirty = 1;
+ +      remain = args->size;
   
         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
                          offset >> PAGE_SHIFT) {
@@@ -1350,7 -1184,7 +1350,7 @@@
                 /* If we don't overwrite a cacheline completely we need to be
                  * careful to have up-to-date data by first clflushing. Don't
                  * overcomplicate things and flush the entire patch. */
- -              partial_cacheline_write = needs_clflush_before &&
+ +              partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
                         ((shmem_page_offset | page_length)
                                 & (boot_cpu_data.x86_clflush_size - 1));
   
@@@ -1360,7 -1194,7 +1360,7 @@@
                 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
                                         user_data, page_do_bit17_swizzling,
                                         partial_cacheline_write,
- -                                      needs_clflush_after);
+ +                                      needs_clflush & CLFLUSH_AFTER);
                 if (ret == 0)
                         goto next_page;
   
@@@ -1369,7 -1203,7 +1369,7 @@@
                 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
                                         user_data, page_do_bit17_swizzling,
                                         partial_cacheline_write,
- -                                      needs_clflush_after);
+ +                                      needs_clflush & CLFLUSH_AFTER);
   
                 mutex_lock(&dev->struct_mutex);
   
@@@ -1383,7 -1217,7 +1383,7 @@@ next_page
         }
   
   out:
- -      i915_gem_object_unpin_pages(obj);
+ +      i915_gem_obj_finish_shmem_access(obj);
   
         if (hit_slowpath) {
                 /*
@@@ -1391,15 -1225,17 +1391,15 @@@
                  * cachelines in-line while writing and the object moved
                  * out of the cpu write domain while we've dropped the lock.
                  */
- -              if (!needs_clflush_after &&
+ +              if (!(needs_clflush & CLFLUSH_AFTER) &&
                     obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
                         if (i915_gem_clflush_object(obj, obj->pin_display))
- -                              needs_clflush_after = true;
+ +                              needs_clflush |= CLFLUSH_AFTER;
                 }
         }
   
- -      if (needs_clflush_after)
+ +      if (needs_clflush & CLFLUSH_AFTER)
                 i915_gem_chipset_flush(to_i915(dev));
- -      else
- -              obj->cache_dirty = true;
   
         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
         return ret;
@@@ -1437,29 -1273,27 +1437,29 @@@ i915_gem_pwrite_ioctl(struct drm_devic
                         return -EFAULT;
         }
   
- -      intel_runtime_pm_get(dev_priv);
- -
- -      ret = i915_mutex_lock_interruptible(dev);
- -      if (ret)
- -              goto put_rpm;
- -
- -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- -      if (&obj->base == NULL) {
- -              ret = -ENOENT;
- -              goto unlock;
- -      }
+ +      obj = i915_gem_object_lookup(file, args->handle);
+ +      if (!obj)
+ +              return -ENOENT;
   
         /* Bounds check destination. */
         if (args->offset > obj->base.size ||
             args->size > obj->base.size - args->offset) {
                 ret = -EINVAL;
- -              goto out;
+ +              goto err;
         }
   
         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
   
+ +      ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
+ +      if (ret)
+ +              goto err;
+ +
+ +      intel_runtime_pm_get(dev_priv);
+ +
+ +      ret = i915_mutex_lock_interruptible(dev);
+ +      if (ret)
+ +              goto err_rpm;
+ +
         ret = -EFAULT;
         /* We can only do the GTT pwrite on untiled buffers, as otherwise
          * it would end up going through the fenced access, and we'll get
@@@ -1478,28 -1312,505 +1478,28 @@@
         if (ret == -EFAULT || ret == -ENOSPC) {
                 if (obj->phys_handle)
                         ret = i915_gem_phys_pwrite(obj, args, file);
- -              else if (i915_gem_object_has_struct_page(obj))
- -                      ret = i915_gem_shmem_pwrite(dev, obj, args, file);
                 else
- -                      ret = -ENODEV;
+ +                      ret = i915_gem_shmem_pwrite(dev, obj, args, file);
         }
   
- -out:
- -      drm_gem_object_unreference(&obj->base);
- -unlock:
+ +      i915_gem_object_put(obj);
         mutex_unlock(&dev->struct_mutex);
- -put_rpm:
         intel_runtime_pm_put(dev_priv);
   
         return ret;
- -}
- -
- -static int
- -i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
- -{
- -      if (__i915_terminally_wedged(reset_counter))
- -              return -EIO;
- -
- -      if (__i915_reset_in_progress(reset_counter)) {
- -              /* Non-interruptible callers can't handle -EAGAIN, hence return
- -               * -EIO unconditionally for these. */
- -              if (!interruptible)
- -                      return -EIO;
- -
- -              return -EAGAIN;
- -      }
   
- -      return 0;
+ +err_rpm:
+ +      intel_runtime_pm_put(dev_priv);
+ +err:
+ +      i915_gem_object_put_unlocked(obj);
+ +      return ret;
   }
   
- -static unsigned long local_clock_us(unsigned *cpu)
+ +static inline enum fb_op_origin
+ +write_origin(struct drm_i915_gem_object *obj, unsigned domain)
   {
- -      unsigned long t;
- -
- -      /* Cheaply and approximately convert from nanoseconds to microseconds.
- -       * The result and subsequent calculations are also defined in the same
- -       * approximate microseconds units. The principal source of timing
- -       * error here is from the simple truncation.
- -       *
- -       * Note that local_clock() is only defined wrt to the current CPU;
- -       * the comparisons are no longer valid if we switch CPUs. Instead of
- -       * blocking preemption for the entire busywait, we can detect the CPU
- -       * switch and use that as indicator of system load and a reason to
- -       * stop busywaiting, see busywait_stop().
- -       */
- -      *cpu = get_cpu();
- -      t = local_clock() >> 10;
- -      put_cpu();
- -
- -      return t;
- -}
- -
- -static bool busywait_stop(unsigned long timeout, unsigned cpu)
- -{
- -      unsigned this_cpu;
- -
- -      if (time_after(local_clock_us(&this_cpu), timeout))
- -              return true;
- -
- -      return this_cpu != cpu;
- -}
- -
- -bool __i915_spin_request(const struct drm_i915_gem_request *req,
- -                       int state, unsigned long timeout_us)
- -{
- -      unsigned cpu;
- -
- -      /* When waiting for high frequency requests, e.g. during synchronous
- -       * rendering split between the CPU and GPU, the finite amount of time
- -       * required to set up the irq and wait upon it limits the response
- -       * rate. By busywaiting on the request completion for a short while we
- -       * can service the high frequency waits as quick as possible. However,
- -       * if it is a slow request, we want to sleep as quickly as possible.
- -       * The tradeoff between waiting and sleeping is roughly the time it
- -       * takes to sleep on a request, on the order of a microsecond.
- -       */
- -
- -      timeout_us += local_clock_us(&cpu);
- -      do {
- -              if (i915_gem_request_completed(req))
- -                      return true;
- -
- -              if (signal_pending_state(state, current))
- -                      break;
- -
- -              if (busywait_stop(timeout_us, cpu))
- -                      break;
- -
- -              cpu_relax_lowlatency();
- -      } while (!need_resched());
- -
- -      return false;
- -}
- -
- -/**
- - * __i915_wait_request - wait until execution of request has finished
- - * @req: duh!
- - * @interruptible: do an interruptible wait (normally yes)
- - * @timeout: in - how long to wait (NULL forever); out - how much time remaining
- - * @rps: RPS client
- - *
- - * Note: It is of utmost importance that the passed in seqno and reset_counter
- - * values have been read by the caller in an smp safe manner. Where read-side
- - * locks are involved, it is sufficient to read the reset_counter before
- - * unlocking the lock that protects the seqno. For lockless tricks, the
- - * reset_counter _must_ be read before, and an appropriate smp_rmb must be
- - * inserted.
- - *
- - * Returns 0 if the request was found within the alloted time. Else returns the
- - * errno with remaining time filled in timeout argument.
- - */
- -int __i915_wait_request(struct drm_i915_gem_request *req,
- -                      bool interruptible,
- -                      s64 *timeout,
- -                      struct intel_rps_client *rps)
- -{
- -      int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
- -      DEFINE_WAIT(reset);
- -      struct intel_wait wait;
- -      unsigned long timeout_remain;
- -      s64 before = 0; /* Only to silence a compiler warning. */
- -      int ret = 0;
- -
- -      might_sleep();
- -
- -      if (list_empty(&req->list))
- -              return 0;
- -
- -      if (i915_gem_request_completed(req))
- -              return 0;
- -
- -      timeout_remain = MAX_SCHEDULE_TIMEOUT;
- -      if (timeout) {
- -              if (WARN_ON(*timeout < 0))
- -                      return -EINVAL;
- -
- -              if (*timeout == 0)
- -                      return -ETIME;
- -
- -              timeout_remain = nsecs_to_jiffies_timeout(*timeout);
- -
- -              /*
- -               * Record current time in case interrupted by signal, or wedged.
- -               */
- -              before = ktime_get_raw_ns();
- -      }
- -
- -      trace_i915_gem_request_wait_begin(req);
- -
- -      /* This client is about to stall waiting for the GPU. In many cases
- -       * this is undesirable and limits the throughput of the system, as
- -       * many clients cannot continue processing user input/output whilst
- -       * blocked. RPS autotuning may take tens of milliseconds to respond
- -       * to the GPU load and thus incurs additional latency for the client.
- -       * We can circumvent that by promoting the GPU frequency to maximum
- -       * before we wait. This makes the GPU throttle up much more quickly
- -       * (good for benchmarks and user experience, e.g. window animations),
- -       * but at a cost of spending more power processing the workload
- -       * (bad for battery). Not all clients even want their results
- -       * immediately and for them we should just let the GPU select its own
- -       * frequency to maximise efficiency. To prevent a single client from
- -       * forcing the clocks too high for the whole system, we only allow
- -       * each client to waitboost once in a busy period.
- -       */
- -      if (INTEL_INFO(req->i915)->gen >= 6)
- -              gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
- -
- -      /* Optimistic spin for the next ~jiffie before touching IRQs */
- -      if (i915_spin_request(req, state, 5))
- -              goto complete;
- -
- -      set_current_state(state);
- -      add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
- -
- -      intel_wait_init(&wait, req->seqno);
- -      if (intel_engine_add_wait(req->engine, &wait))
- -              /* In order to check that we haven't missed the interrupt
- -               * as we enabled it, we need to kick ourselves to do a
- -               * coherent check on the seqno before we sleep.
- -               */
- -              goto wakeup;
- -
- -      for (;;) {
- -              if (signal_pending_state(state, current)) {
- -                      ret = -ERESTARTSYS;
- -                      break;
- -              }
- -
- -              timeout_remain = io_schedule_timeout(timeout_remain);
- -              if (timeout_remain == 0) {
- -                      ret = -ETIME;
- -                      break;
- -              }
- -
- -              if (intel_wait_complete(&wait))
- -                      break;
- -
- -              set_current_state(state);
- -
- -wakeup:
- -              /* Carefully check if the request is complete, giving time
- -               * for the seqno to be visible following the interrupt.
- -               * We also have to check in case we are kicked by the GPU
- -               * reset in order to drop the struct_mutex.
- -               */
- -              if (__i915_request_irq_complete(req))
- -                      break;
- -
- -              /* Only spin if we know the GPU is processing this request */
- -              if (i915_spin_request(req, state, 2))
- -                      break;
- -      }
- -      remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
- -
- -      intel_engine_remove_wait(req->engine, &wait);
- -      __set_current_state(TASK_RUNNING);
- -complete:
- -      trace_i915_gem_request_wait_end(req);
- -
- -      if (timeout) {
- -              s64 tres = *timeout - (ktime_get_raw_ns() - before);
- -
- -              *timeout = tres < 0 ? 0 : tres;
- -
- -              /*
- -               * Apparently ktime isn't accurate enough and occasionally has a
- -               * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
- -               * things up to make the test happy. We allow up to 1 jiffy.
- -               *
- -               * This is a regrssion from the timespec->ktime conversion.
- -               */
- -              if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
- -                      *timeout = 0;
- -      }
- -
- -      if (rps && req->seqno == req->engine->last_submitted_seqno) {
- -              /* The GPU is now idle and this client has stalled.
- -               * Since no other client has submitted a request in the
- -               * meantime, assume that this client is the only one
- -               * supplying work to the GPU but is unable to keep that
- -               * work supplied because it is waiting. Since the GPU is
- -               * then never kept fully busy, RPS autoclocking will
- -               * keep the clocks relatively low, causing further delays.
- -               * Compensate by giving the synchronous client credit for
- -               * a waitboost next time.
- -               */
- -              spin_lock(&req->i915->rps.client_lock);
- -              list_del_init(&rps->link);
- -              spin_unlock(&req->i915->rps.client_lock);
- -      }
- -
- -      return ret;
- -}
- -
- -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
- -                                 struct drm_file *file)
- -{
- -      struct drm_i915_file_private *file_priv;
- -
- -      WARN_ON(!req || !file || req->file_priv);
- -
- -      if (!req || !file)
- -              return -EINVAL;
- -
- -      if (req->file_priv)
- -              return -EINVAL;
- -
- -      file_priv = file->driver_priv;
- -
- -      spin_lock(&file_priv->mm.lock);
- -      req->file_priv = file_priv;
- -      list_add_tail(&req->client_list, &file_priv->mm.request_list);
- -      spin_unlock(&file_priv->mm.lock);
- -
- -      req->pid = get_pid(task_pid(current));
- -
- -      return 0;
- -}
- -
- -static inline void
- -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
- -{
- -      struct drm_i915_file_private *file_priv = request->file_priv;
- -
- -      if (!file_priv)
- -              return;
- -
- -      spin_lock(&file_priv->mm.lock);
- -      list_del(&request->client_list);
- -      request->file_priv = NULL;
- -      spin_unlock(&file_priv->mm.lock);
- -
- -      put_pid(request->pid);
- -      request->pid = NULL;
- -}
- -
- -static void i915_gem_request_retire(struct drm_i915_gem_request *request)
- -{
- -      trace_i915_gem_request_retire(request);
- -
- -      /* We know the GPU must have read the request to have
- -       * sent us the seqno + interrupt, so use the position
- -       * of tail of the request to update the last known position
- -       * of the GPU head.
- -       *
- -       * Note this requires that we are always called in request
- -       * completion order.
- -       */
- -      request->ringbuf->last_retired_head = request->postfix;
- -
- -      list_del_init(&request->list);
- -      i915_gem_request_remove_from_client(request);
- -
- -      if (request->previous_context) {
- -              if (i915.enable_execlists)
- -                      intel_lr_context_unpin(request->previous_context,
- -                                             request->engine);
- -      }
- -
- -      i915_gem_context_unreference(request->ctx);
- -      i915_gem_request_unreference(request);
- -}
- -
- -static void
- -__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
- -{
- -      struct intel_engine_cs *engine = req->engine;
- -      struct drm_i915_gem_request *tmp;
- -
- -      lockdep_assert_held(&engine->i915->drm.struct_mutex);
- -
- -      if (list_empty(&req->list))
- -              return;
- -
- -      do {
- -              tmp = list_first_entry(&engine->request_list,
- -                                     typeof(*tmp), list);
- -
- -              i915_gem_request_retire(tmp);
- -      } while (tmp != req);
- -
- -      WARN_ON(i915_verify_lists(engine->dev));
- -}
- -
- -/**
- - * Waits for a request to be signaled, and cleans up the
- - * request and object lists appropriately for that event.
- - * @req: request to wait on
- - */
- -int
- -i915_wait_request(struct drm_i915_gem_request *req)
- -{
- -      struct drm_i915_private *dev_priv = req->i915;
- -      bool interruptible;
- -      int ret;
- -
- -      interruptible = dev_priv->mm.interruptible;
- -
- -      BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex));
- -
- -      ret = __i915_wait_request(req, interruptible, NULL, NULL);
- -      if (ret)
- -              return ret;
- -
- -      /* If the GPU hung, we want to keep the requests to find the guilty. */
- -      if (!i915_reset_in_progress(&dev_priv->gpu_error))
- -              __i915_gem_request_retire__upto(req);
- -
- -      return 0;
- -}
- -
- -/**
- - * Ensures that all rendering to the object has completed and the object is
- - * safe to unbind from the GTT or access from the CPU.
- - * @obj: i915 gem object
- - * @readonly: waiting for read access or write
- - */
- -int
- -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
- -                             bool readonly)
- -{
- -      int ret, i;
- -
- -      if (!obj->active)
- -              return 0;
- -
- -      if (readonly) {
- -              if (obj->last_write_req != NULL) {
- -                      ret = i915_wait_request(obj->last_write_req);
- -                      if (ret)
- -                              return ret;
- -
- -                      i = obj->last_write_req->engine->id;
- -                      if (obj->last_read_req[i] == obj->last_write_req)
- -                              i915_gem_object_retire__read(obj, i);
- -                      else
- -                              i915_gem_object_retire__write(obj);
- -              }
- -      } else {
- -              for (i = 0; i < I915_NUM_ENGINES; i++) {
- -                      if (obj->last_read_req[i] == NULL)
- -                              continue;
- -
- -                      ret = i915_wait_request(obj->last_read_req[i]);
- -                      if (ret)
- -                              return ret;
- -
- -                      i915_gem_object_retire__read(obj, i);
- -              }
- -              GEM_BUG_ON(obj->active);
- -      }
- -
- -      return 0;
- -}
- -
- -static void
- -i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
- -                             struct drm_i915_gem_request *req)
- -{
- -      int ring = req->engine->id;
- -
- -      if (obj->last_read_req[ring] == req)
- -              i915_gem_object_retire__read(obj, ring);
- -      else if (obj->last_write_req == req)
- -              i915_gem_object_retire__write(obj);
- -
- -      if (!i915_reset_in_progress(&req->i915->gpu_error))
- -              __i915_gem_request_retire__upto(req);
- -}
- -
- -/* A nonblocking variant of the above wait. This is a highly dangerous routine
- - * as the object state may change during this call.
- - */
- -static __must_check int
- -i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
- -                                          struct intel_rps_client *rps,
- -                                          bool readonly)
- -{
- -      struct drm_device *dev = obj->base.dev;
- -      struct drm_i915_private *dev_priv = to_i915(dev);
- -      struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
- -      int ret, i, n = 0;
- -
- -      BUG_ON(!mutex_is_locked(&dev->struct_mutex));
- -      BUG_ON(!dev_priv->mm.interruptible);
- -
- -      if (!obj->active)
- -              return 0;
- -
- -      if (readonly) {
- -              struct drm_i915_gem_request *req;
- -
- -              req = obj->last_write_req;
- -              if (req == NULL)
- -                      return 0;
- -
- -              requests[n++] = i915_gem_request_reference(req);
- -      } else {
- -              for (i = 0; i < I915_NUM_ENGINES; i++) {
- -                      struct drm_i915_gem_request *req;
- -
- -                      req = obj->last_read_req[i];
- -                      if (req == NULL)
- -                              continue;
- -
- -                      requests[n++] = i915_gem_request_reference(req);
- -              }
- -      }
- -
- -      mutex_unlock(&dev->struct_mutex);
- -      ret = 0;
- -      for (i = 0; ret == 0 && i < n; i++)
- -              ret = __i915_wait_request(requests[i], true, NULL, rps);
- -      mutex_lock(&dev->struct_mutex);
- -
- -      for (i = 0; i < n; i++) {
- -              if (ret == 0)
- -                      i915_gem_object_retire_request(obj, requests[i]);
- -              i915_gem_request_unreference(requests[i]);
- -      }
- -
- -      return ret;
- -}
- -
- -static struct intel_rps_client *to_rps_client(struct drm_file *file)
- -{
- -      struct drm_i915_file_private *fpriv = file->driver_priv;
- -      return &fpriv->rps;
- -}
- -
- -static enum fb_op_origin
- -write_origin(struct drm_i915_gem_object *obj, unsigned domain)
- -{
- -      return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
- -             ORIGIN_GTT : ORIGIN_CPU;
+ +      return (domain == I915_GEM_DOMAIN_GTT ?
+ +              obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
   }
   
   /**
@@@ -1520,7 -1831,10 +1520,7 @@@ i915_gem_set_domain_ioctl(struct drm_de
         int ret;
   
         /* Only handle setting domains to types used by the CPU. */
- -      if (write_domain & I915_GEM_GPU_DOMAINS)
- -              return -EINVAL;
- -
- -      if (read_domains & I915_GEM_GPU_DOMAINS)
+ +      if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
                 return -EINVAL;
   
         /* Having something in the write domain implies it's in the read
@@@ -1529,21 -1843,25 +1529,21 @@@
         if (write_domain != 0 && read_domains != write_domain)
                 return -EINVAL;
   
- -      ret = i915_mutex_lock_interruptible(dev);
- -      if (ret)
- -              return ret;
- -
- -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- -      if (&obj->base == NULL) {
- -              ret = -ENOENT;
- -              goto unlock;
- -      }
+ +      obj = i915_gem_object_lookup(file, args->handle);
+ +      if (!obj)
+ +              return -ENOENT;
   
         /* Try to flush the object off the GPU without holding the lock.
          * We will repeat the flush holding the lock in the normal manner
          * to catch cases where we are gazumped.
          */
- -      ret = i915_gem_object_wait_rendering__nonblocking(obj,
- -                                                        to_rps_client(file),
- -                                                        !write_domain);
+ +      ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
+ +      if (ret)
+ +              goto err;
+ +
+ +      ret = i915_mutex_lock_interruptible(dev);
         if (ret)
- -              goto unref;
+ +              goto err;
   
         if (read_domains & I915_GEM_DOMAIN_GTT)
                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
@@@ -1553,13 -1871,11 +1553,13 @@@
         if (write_domain != 0)
                 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
   
- -unref:
- -      drm_gem_object_unreference(&obj->base);
- -unlock:
+ +      i915_gem_object_put(obj);
         mutex_unlock(&dev->struct_mutex);
         return ret;
+ +
+ +err:
+ +      i915_gem_object_put_unlocked(obj);
+ +      return ret;
   }
   
   /**
@@@ -1574,23 -1890,26 +1574,23 @@@ i915_gem_sw_finish_ioctl(struct drm_dev
   {
         struct drm_i915_gem_sw_finish *args = data;
         struct drm_i915_gem_object *obj;
- -      int ret = 0;
+ +      int err = 0;
   
- -      ret = i915_mutex_lock_interruptible(dev);
- -      if (ret)
- -              return ret;
- -
- -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- -      if (&obj->base == NULL) {
- -              ret = -ENOENT;
- -              goto unlock;
- -      }
+ +      obj = i915_gem_object_lookup(file, args->handle);
+ +      if (!obj)
+ +              return -ENOENT;
   
         /* Pinned buffers may be scanout, so flush the cache */
- -      if (obj->pin_display)
- -              i915_gem_object_flush_cpu_write_domain(obj);
+ +      if (READ_ONCE(obj->pin_display)) {
+ +              err = i915_mutex_lock_interruptible(dev);
+ +              if (!err) {
+ +                      i915_gem_object_flush_cpu_write_domain(obj);
+ +                      mutex_unlock(&dev->struct_mutex);
+ +              }
+ +      }
   
- -      drm_gem_object_unreference(&obj->base);
- -unlock:
- -      mutex_unlock(&dev->struct_mutex);
- -      return ret;
+ +      i915_gem_object_put_unlocked(obj);
+ +      return err;
   }
   
   /**
@@@ -1618,7 -1937,7 +1618,7 @@@ i915_gem_mmap_ioctl(struct drm_device *
                     struct drm_file *file)
   {
         struct drm_i915_gem_mmap *args = data;
- -      struct drm_gem_object *obj;
+ +      struct drm_i915_gem_object *obj;
         unsigned long addr;
   
         if (args->flags & ~(I915_MMAP_WC))
@@@ -1627,19 -1946,19 +1627,19 @@@
         if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
                 return -ENODEV;
   
- -      obj = drm_gem_object_lookup(file, args->handle);
- -      if (obj == NULL)
+ +      obj = i915_gem_object_lookup(file, args->handle);
+ +      if (!obj)
                 return -ENOENT;
   
         /* prime objects have no backing filp to GEM mmap
          * pages from.
          */
- -      if (!obj->filp) {
- -              drm_gem_object_unreference_unlocked(obj);
+ +      if (!obj->base.filp) {
+ +              i915_gem_object_put_unlocked(obj);
                 return -EINVAL;
         }
   
- -      addr = vm_mmap(obj->filp, 0, args->size,
+ +      addr = vm_mmap(obj->base.filp, 0, args->size,
                        PROT_READ | PROT_WRITE, MAP_SHARED,
                        args->offset);
         if (args->flags & I915_MMAP_WC) {
@@@ -1647,7 -1966,7 +1647,7 @@@
                 struct vm_area_struct *vma;
   
                 if (down_write_killable(&mm->mmap_sem)) {
- -                      drm_gem_object_unreference_unlocked(obj);
+ +                      i915_gem_object_put_unlocked(obj);
                         return -EINTR;
                 }
                 vma = find_vma(mm, addr);
@@@ -1659,9 -1978,9 +1659,9 @@@
                 up_write(&mm->mmap_sem);
   
                 /* This may race, but that's ok, it only gets set */
- -              WRITE_ONCE(to_intel_bo(obj)->has_wc_mmap, true);
+ +              WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
         }
- -      drm_gem_object_unreference_unlocked(obj);
+ +      i915_gem_object_put_unlocked(obj);
         if (IS_ERR((void *)addr))
                 return addr;
   
@@@ -1670,69 -1989,9 +1670,69 @@@
         return 0;
   }
   
+ +static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
+ +{
+ +      u64 size;
+ +
+ +      size = i915_gem_object_get_stride(obj);
+ +      size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
+ +
+ +      return size >> PAGE_SHIFT;
+ +}
+ +
+ +/**
+ + * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
+ + *
+ + * A history of the GTT mmap interface:
+ + *
+ + * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
+ + *     aligned and suitable for fencing, and still fit into the available
+ + *     mappable space left by the pinned display objects. A classic problem
+ + *     we called the page-fault-of-doom where we would ping-pong between
+ + *     two objects that could not fit inside the GTT and so the memcpy
+ + *     would page one object in at the expense of the other between every
+ + *     single byte.
+ + *
+ + * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
+ + *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
+ + *     object is too large for the available space (or simply too large
+ + *     for the mappable aperture!), a view is created instead and faulted
+ + *     into userspace. (This view is aligned and sized appropriately for
+ + *     fenced access.)
+ + *
+ + * Restrictions:
+ + *
+ + *  * snoopable objects cannot be accessed via the GTT. It can cause machine
+ + *    hangs on some architectures, corruption on others. An attempt to service
+ + *    a GTT page fault from a snoopable object will generate a SIGBUS.
+ + *
+ + *  * the object must be able to fit into RAM (physical memory, though no
+ + *    limited to the mappable aperture).
+ + *
+ + *
+ + * Caveats:
+ + *
+ + *  * a new GTT page fault will synchronize rendering from the GPU and flush
+ + *    all data to system memory. Subsequent access will not be synchronized.
+ + *
+ + *  * all mappings are revoked on runtime device suspend.
+ + *
+ + *  * there are only 8, 16 or 32 fence registers to share between all users
+ + *    (older machines require fence register for display and blitter access
+ + *    as well). Contention of the fence registers will cause the previous users
+ + *    to be unmapped and any new access will generate new page faults.
+ + *
+ + *  * running out of memory while servicing a fault may generate a SIGBUS,
+ + *    rather than the expected SIGSEGV.
+ + */
+ +int i915_gem_mmap_gtt_version(void)
+ +{
+ +      return 1;
+ +}
+ +
   /**
    * i915_gem_fault - fault a page into the GTT
- - * @vma: VMA in question
+ + * @area: CPU VMA in question
    * @vmf: fault info
    *
    * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
@@@ -1745,120 -2004,122 +1745,120 @@@
    * from the GTT and/or fence registers to make room.  So performance may
    * suffer if the GTT working set is large or there are few fence registers
    * left.
+ + *
+ + * The current feature set supported by i915_gem_fault() and thus GTT mmaps
+ + * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
    */
- -int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ +int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
   {
- -      struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
+ +#define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
+ +      struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
         struct drm_device *dev = obj->base.dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct i915_ggtt *ggtt = &dev_priv->ggtt;
- -      struct i915_ggtt_view view = i915_ggtt_view_normal;
- -      pgoff_t page_offset;
- -      unsigned long pfn;
- -      int ret = 0;
         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
- -
- -      intel_runtime_pm_get(dev_priv);
+ +      struct i915_vma *vma;
+ +      pgoff_t page_offset;
+ +      unsigned int flags;
+ +      int ret;
   
         /* We don't use vmf->pgoff since that has the fake offset */
- -      page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
+ +      page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >>
                 PAGE_SHIFT;
   
- -      ret = i915_mutex_lock_interruptible(dev);
- -      if (ret)
- -              goto out;
- -
         trace_i915_gem_object_fault(obj, page_offset, true, write);
   
         /* Try to flush the object off the GPU first without holding the lock.
- -       * Upon reacquiring the lock, we will perform our sanity checks and then
+ +       * Upon acquiring the lock, we will perform our sanity checks and then
          * repeat the flush holding the lock in the normal manner to catch cases
          * where we are gazumped.
          */
- -      ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
+ +      ret = __unsafe_wait_rendering(obj, NULL, !write);
         if (ret)
- -              goto unlock;
+ +              goto err;
+ +
+ +      intel_runtime_pm_get(dev_priv);
+ +
+ +      ret = i915_mutex_lock_interruptible(dev);
+ +      if (ret)
+ +              goto err_rpm;
   
         /* Access to snoopable pages through the GTT is incoherent. */
         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
                 ret = -EFAULT;
- -              goto unlock;
+ +              goto err_unlock;
         }
   
- -      /* Use a partial view if the object is bigger than the aperture. */
- -      if (obj->base.size >= ggtt->mappable_end &&
- -          obj->tiling_mode == I915_TILING_NONE) {
- -              static const unsigned int chunk_size = 256; // 1 MiB
+ +      /* If the object is smaller than a couple of partial vma, it is
+ +       * not worth only creating a single partial vma - we may as well
+ +       * clear enough space for the full object.
+ +       */
+ +      flags = PIN_MAPPABLE;
+ +      if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
+ +              flags |= PIN_NONBLOCK | PIN_NONFAULT;
+ +
+ +      /* Now pin it into the GTT as needed */
+ +      vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
+ +      if (IS_ERR(vma)) {
+ +              struct i915_ggtt_view view;
+ +              unsigned int chunk_size;
+ +
+ +              /* Use a partial view if it is bigger than available space */
+ +              chunk_size = MIN_CHUNK_PAGES;
+ +              if (i915_gem_object_is_tiled(obj))
+ +                      chunk_size = max(chunk_size, tile_row_pages(obj));
   
                 memset(&view, 0, sizeof(view));
                 view.type = I915_GGTT_VIEW_PARTIAL;
                 view.params.partial.offset = rounddown(page_offset, chunk_size);
                 view.params.partial.size =
- -                      min_t(unsigned int,
- -                            chunk_size,
- -                            (vma->vm_end - vma->vm_start)/PAGE_SIZE -
+ +                      min_t(unsigned int, chunk_size,
+ +                            (area->vm_end - area->vm_start) / PAGE_SIZE -
                               view.params.partial.offset);
- -      }
   
- -      /* Now pin it into the GTT if needed */
- -      ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
- -      if (ret)
- -              goto unlock;
+ +              /* If the partial covers the entire object, just create a
+ +               * normal VMA.
+ +               */
+ +              if (chunk_size >= obj->base.size >> PAGE_SHIFT)
+ +                      view.type = I915_GGTT_VIEW_NORMAL;
+ +
+ +              /* Userspace is now writing through an untracked VMA, abandon
+ +               * all hope that the hardware is able to track future writes.
+ +               */
+ +              obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
+ +
+ +              vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+ +      }
+ +      if (IS_ERR(vma)) {
+ +              ret = PTR_ERR(vma);
+ +              goto err_unlock;
+ +      }
   
         ret = i915_gem_object_set_to_gtt_domain(obj, write);
         if (ret)
- -              goto unpin;
+ +              goto err_unpin;
   
- -      ret = i915_gem_object_get_fence(obj);
+ +      ret = i915_vma_get_fence(vma);
         if (ret)
- -              goto unpin;
+ +              goto err_unpin;
   
         /* Finally, remap it using the new GTT offset */
- -      pfn = ggtt->mappable_base +
- -              i915_gem_obj_ggtt_offset_view(obj, &view);
- -      pfn >>= PAGE_SHIFT;
- -
- -      if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
- -              /* Overriding existing pages in partial view does not cause
- -               * us any trouble as TLBs are still valid because the fault
- -               * is due to userspace losing part of the mapping or never
- -               * having accessed it before (at this partials' range).
- -               */
- -              unsigned long base = vma->vm_start +
- -                                   (view.params.partial.offset << PAGE_SHIFT);
- -              unsigned int i;
- -
- -              for (i = 0; i < view.params.partial.size; i++) {
- -                      ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
- -                      if (ret)
- -                              break;
- -              }
- -
- -              obj->fault_mappable = true;
- -      } else {
- -              if (!obj->fault_mappable) {
- -                      unsigned long size = min_t(unsigned long,
- -                                                 vma->vm_end - vma->vm_start,
- -                                                 obj->base.size);
- -                      int i;
- -
- -                      for (i = 0; i < size >> PAGE_SHIFT; i++) {
- -                              ret = vm_insert_pfn(vma,
- -                                                  (unsigned long)vma->vm_start + i * PAGE_SIZE,
- -                                                  pfn + i);
- -                              if (ret)
- -                                      break;
- -                      }
+ +      ret = remap_io_mapping(area,
+ +                             area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
+ +                             (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
+ +                             min_t(u64, vma->size, area->vm_end - area->vm_start),
+ +                             &ggtt->mappable);
+ +      if (ret)
+ +              goto err_unpin;
   
- -                      obj->fault_mappable = true;
- -              } else
- -                      ret = vm_insert_pfn(vma,
- -                                          (unsigned long)vmf->virtual_address,
- -                                          pfn + page_offset);
- -      }
- -unpin:
- -      i915_gem_object_ggtt_unpin_view(obj, &view);
- -unlock:
+ +      obj->fault_mappable = true;
+ +err_unpin:
+ +      __i915_vma_unpin(vma);
+ +err_unlock:
         mutex_unlock(&dev->struct_mutex);
- -out:
+ +err_rpm:
+ +      intel_runtime_pm_put(dev_priv);
+ +err:
         switch (ret) {
         case -EIO:
                 /*
@@@ -1899,6 -2160,8 +1899,6 @@@
                 ret = VM_FAULT_SIGBUS;
                 break;
         }
- -
- -      intel_runtime_pm_put(dev_priv);
         return ret;
   }
   
@@@ -1952,58 -2215,46 +1952,58 @@@ i915_gem_release_all_mmaps(struct drm_i
                 i915_gem_release_mmap(obj);
   }
   
- -uint32_t
- -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
+ +/**
+ + * i915_gem_get_ggtt_size - return required global GTT size for an object
+ + * @dev_priv: i915 device
+ + * @size: object size
+ + * @tiling_mode: tiling mode
+ + *
+ + * Return the required global GTT size for an object, taking into account
+ + * potential fence register mapping.
+ + */
+ +u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
+ +                         u64 size, int tiling_mode)
   {
- -      uint32_t gtt_size;
+ +      u64 ggtt_size;
   
- -      if (INTEL_INFO(dev)->gen >= 4 ||
+ +      GEM_BUG_ON(size == 0);
+ +
+ +      if (INTEL_GEN(dev_priv) >= 4 ||
             tiling_mode == I915_TILING_NONE)
                 return size;
   
         /* Previous chips need a power-of-two fence region when tiling */
- -      if (IS_GEN3(dev))
- -              gtt_size = 1024*1024;
+ +      if (IS_GEN3(dev_priv))
+ +              ggtt_size = 1024*1024;
         else
- -              gtt_size = 512*1024;
+ +              ggtt_size = 512*1024;
   
- -      while (gtt_size < size)
- -              gtt_size <<= 1;
+ +      while (ggtt_size < size)
+ +              ggtt_size <<= 1;
   
- -      return gtt_size;
+ +      return ggtt_size;
   }
   
   /**
- - * i915_gem_get_gtt_alignment - return required GTT alignment for an object
- - * @dev: drm device
+ + * i915_gem_get_ggtt_alignment - return required global GTT alignment
+ + * @dev_priv: i915 device
    * @size: object size
    * @tiling_mode: tiling mode
- - * @fenced: is fenced alignemned required or not
+ + * @fenced: is fenced alignment required or not
    *
- - * Return the required GTT alignment for an object, taking into account
+ + * Return the required global GTT alignment for an object, taking into account
    * potential fence register mapping.
    */
- -uint32_t
- -i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
- -                         int tiling_mode, bool fenced)
+ +u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
+ +                              int tiling_mode, bool fenced)
   {
+ +      GEM_BUG_ON(size == 0);
+ +
         /*
          * Minimum alignment is 4k (GTT page size), but might be greater
          * if a fence register is needed for the object.
          */
- -      if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
+ +      if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
             tiling_mode == I915_TILING_NONE)
                 return 4096;
   
@@@ -2011,34 -2262,42 +2011,34 @@@
          * Previous chips need to be aligned to the size of the smallest
          * fence register that can contain the object.
          */
- -      return i915_gem_get_gtt_size(dev, size, tiling_mode);
+ +      return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
   }
   
   static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
   {
         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- -      int ret;
- -
- -      dev_priv->mm.shrinker_no_lock_stealing = true;
+ +      int err;
   
- -      ret = drm_gem_create_mmap_offset(&obj->base);
- -      if (ret != -ENOSPC)
- -              goto out;
+ +      err = drm_gem_create_mmap_offset(&obj->base);
+ +      if (!err)
+ +              return 0;
   
- -      /* Badly fragmented mmap space? The only way we can recover
- -       * space is by destroying unwanted objects. We can't randomly release
- -       * mmap_offsets as userspace expects them to be persistent for the
- -       * lifetime of the objects. The closest we can is to release the
- -       * offsets on purgeable objects by truncating it and marking it purged,
- -       * which prevents userspace from ever using that object again.
+ +      /* We can idle the GPU locklessly to flush stale objects, but in order
+ +       * to claim that space for ourselves, we need to take the big
+ +       * struct_mutex to free the requests+objects and allocate our slot.
          */
- -      i915_gem_shrink(dev_priv,
- -                      obj->base.size >> PAGE_SHIFT,
- -                      I915_SHRINK_BOUND |
- -                      I915_SHRINK_UNBOUND |
- -                      I915_SHRINK_PURGEABLE);
- -      ret = drm_gem_create_mmap_offset(&obj->base);
- -      if (ret != -ENOSPC)
- -              goto out;
+ +      err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+ +      if (err)
+ +              return err;
   
- -      i915_gem_shrink_all(dev_priv);
- -      ret = drm_gem_create_mmap_offset(&obj->base);
- -out:
- -      dev_priv->mm.shrinker_no_lock_stealing = false;
+ +      err = i915_mutex_lock_interruptible(&dev_priv->drm);
+ +      if (!err) {
+ +              i915_gem_retire_requests(dev_priv);
+ +              err = drm_gem_create_mmap_offset(&obj->base);
+ +              mutex_unlock(&dev_priv->drm.struct_mutex);
+ +      }
   
- -      return ret;
+ +      return err;
   }
   
   static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
@@@ -2055,15 -2314,32 +2055,15 @@@ i915_gem_mmap_gtt(struct drm_file *file
         struct drm_i915_gem_object *obj;
         int ret;
   
- -      ret = i915_mutex_lock_interruptible(dev);
- -      if (ret)
- -              return ret;
- -
- -      obj = to_intel_bo(drm_gem_object_lookup(file, handle));
- -      if (&obj->base == NULL) {
- -              ret = -ENOENT;
- -              goto unlock;
- -      }
- -
- -      if (obj->madv != I915_MADV_WILLNEED) {
- -              DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
- -              ret = -EFAULT;
- -              goto out;
- -      }
+ +      obj = i915_gem_object_lookup(file, handle);
+ +      if (!obj)
+ +              return -ENOENT;
   
         ret = i915_gem_object_create_mmap_offset(obj);
- -      if (ret)
- -              goto out;
- -
- -      *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+ +      if (ret == 0)
+ +              *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
   
- -out:
- -      drm_gem_object_unreference(&obj->base);
- -unlock:
- -      mutex_unlock(&dev->struct_mutex);
+ +      i915_gem_object_put_unlocked(obj);
         return ret;
   }
   
@@@ -2181,7 -2457,7 +2181,7 @@@ i915_gem_object_put_pages(struct drm_i9
         if (obj->pages_pin_count)
                 return -EBUSY;
   
- -      BUG_ON(i915_gem_obj_bound_any(obj));
+ +      GEM_BUG_ON(obj->bind_count);
   
         /* ->put_pages might need to allocate memory for the bit17 swizzle
          * array, hence protect them from being reaped by removing them from gtt
@@@ -2189,14 -2465,10 +2189,14 @@@
         list_del(&obj->global_list);
   
         if (obj->mapping) {
- -              if (is_vmalloc_addr(obj->mapping))
- -                      vunmap(obj->mapping);
+ +              void *ptr;
+ +
+ +              ptr = ptr_mask_bits(obj->mapping);
+ +              if (is_vmalloc_addr(ptr))
+ +                      vunmap(ptr);
                 else
- -                      kunmap(kmap_to_page(obj->mapping));
+ +                      kunmap(kmap_to_page(ptr));
+ +
                 obj->mapping = NULL;
         }
   
@@@ -2305,7 -2577,7 +2305,7 @@@ i915_gem_object_get_pages_gtt(struct dr
         if (i915_gem_object_needs_bit17_swizzle(obj))
                 i915_gem_object_do_bit_17_swizzle(obj);
   
- -      if (obj->tiling_mode != I915_TILING_NONE &&
+ +      if (i915_gem_object_is_tiled(obj) &&
             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
                 i915_gem_object_pin_pages(obj);
   
@@@ -2369,8 -2641,7 +2369,8 @@@ i915_gem_object_get_pages(struct drm_i9
   }
   
   /* The 'mapping' part of i915_gem_object_pin_map() below */
- -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
+ +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+ +                               enum i915_map_type type)
   {
         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
         struct sg_table *sgt = obj->pages;
@@@ -2379,11 -2650,10 +2379,11 @@@
         struct page *stack_pages[32];
         struct page **pages = stack_pages;
         unsigned long i = 0;
+ +      pgprot_t pgprot;
         void *addr;
   
         /* A single page can always be kmapped */
- -      if (n_pages == 1)
+ +      if (n_pages == 1 && type == I915_MAP_WB)
                 return kmap(sg_page(sgt->sgl));
   
         if (n_pages > ARRAY_SIZE(stack_pages)) {
@@@ -2399,15 -2669,7 +2399,15 @@@
         /* Check that we have the expected number of pages */
         GEM_BUG_ON(i != n_pages);
   
- -      addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+ +      switch (type) {
+ +      case I915_MAP_WB:
+ +              pgprot = PAGE_KERNEL;
+ +              break;
+ +      case I915_MAP_WC:
+ +              pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
+ +              break;
+ +      }
+ +      addr = vmap(pages, n_pages, 0, pgprot);
   
         if (pages != stack_pages)
                 drm_free_large(pages);
@@@ -2416,89 -2678,276 +2416,89 @@@
   }
   
   /* get, pin, and map the pages of the object into kernel space */
- -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
+ +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+ +                            enum i915_map_type type)
   {
+ +      enum i915_map_type has_type;
+ +      bool pinned;
+ +      void *ptr;
         int ret;
   
         lockdep_assert_held(&obj->base.dev->struct_mutex);
+ +      GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
   
         ret = i915_gem_object_get_pages(obj);
         if (ret)
                 return ERR_PTR(ret);
   
         i915_gem_object_pin_pages(obj);
+ +      pinned = obj->pages_pin_count > 1;
   
- -      if (!obj->mapping) {
- -              obj->mapping = i915_gem_object_map(obj);
- -              if (!obj->mapping) {
- -                      i915_gem_object_unpin_pages(obj);
- -                      return ERR_PTR(-ENOMEM);
+ +      ptr = ptr_unpack_bits(obj->mapping, has_type);
+ +      if (ptr && has_type != type) {
+ +              if (pinned) {
+ +                      ret = -EBUSY;
+ +                      goto err;
                 }
- -      }
   
- -      return obj->mapping;
- -}
+ +              if (is_vmalloc_addr(ptr))
+ +                      vunmap(ptr);
+ +              else
+ +                      kunmap(kmap_to_page(ptr));
   
- -void i915_vma_move_to_active(struct i915_vma *vma,
- -                           struct drm_i915_gem_request *req)
- -{
- -      struct drm_i915_gem_object *obj = vma->obj;
- -      struct intel_engine_cs *engine;
+ +              ptr = obj->mapping = NULL;
+ +      }
   
- -      engine = i915_gem_request_get_engine(req);
+ +      if (!ptr) {
+ +              ptr = i915_gem_object_map(obj, type);
+ +              if (!ptr) {
+ +                      ret = -ENOMEM;
+ +                      goto err;
+ +              }
   
- -      /* Add a reference if we're newly entering the active list. */
- -      if (obj->active == 0)
- -              drm_gem_object_reference(&obj->base);
- -      obj->active |= intel_engine_flag(engine);
+ +              obj->mapping = ptr_pack_bits(ptr, type);
+ +      }
   
- -      list_move_tail(&obj->engine_list[engine->id], &engine->active_list);
- -      i915_gem_request_assign(&obj->last_read_req[engine->id], req);
+ +      return ptr;
   
- -      list_move_tail(&vma->vm_link, &vma->vm->active_list);
+ +err:
+ +      i915_gem_object_unpin_pages(obj);
+ +      return ERR_PTR(ret);
   }
   
   static void
- -i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
+ +i915_gem_object_retire__write(struct i915_gem_active *active,
+ +                            struct drm_i915_gem_request *request)
   {
- -      GEM_BUG_ON(obj->last_write_req == NULL);
- -      GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine)));
+ +      struct drm_i915_gem_object *obj =
+ +              container_of(active, struct drm_i915_gem_object, last_write);
   
- -      i915_gem_request_assign(&obj->last_write_req, NULL);
         intel_fb_obj_flush(obj, true, ORIGIN_CS);
   }
   
   static void
- -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
+ +i915_gem_object_retire__read(struct i915_gem_active *active,
+ +                           struct drm_i915_gem_request *request)
   {
- -      struct i915_vma *vma;
- -
- -      GEM_BUG_ON(obj->last_read_req[ring] == NULL);
- -      GEM_BUG_ON(!(obj->active & (1 << ring)));
- -
- -      list_del_init(&obj->engine_list[ring]);
- -      i915_gem_request_assign(&obj->last_read_req[ring], NULL);
- -
- -      if (obj->last_write_req && obj->last_write_req->engine->id == ring)
- -              i915_gem_object_retire__write(obj);
- -
- -      obj->active &= ~(1 << ring);
- -      if (obj->active)
- -              return;
- -
- -      /* Bump our place on the bound list to keep it roughly in LRU order
- -       * so that we don't steal from recently used but inactive objects
- -       * (unless we are forced to ofc!)
- -       */
- -      list_move_tail(&obj->global_list,
- -                     &to_i915(obj->base.dev)->mm.bound_list);
- -
- -      list_for_each_entry(vma, &obj->vma_list, obj_link) {
- -              if (!list_empty(&vma->vm_link))
- -                      list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
- -      }
- -
- -      i915_gem_request_assign(&obj->last_fenced_req, NULL);
- -      drm_gem_object_unreference(&obj->base);
- -}
- -
- -static int
- -i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
- -{
- -      struct intel_engine_cs *engine;
- -      int ret;
- -
- -      /* Carefully retire all requests without writing to the rings */
- -      for_each_engine(engine, dev_priv) {
- -              ret = intel_engine_idle(engine);
- -              if (ret)
- -                      return ret;
- -      }
- -      i915_gem_retire_requests(dev_priv);
- -
- -      /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
- -      if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
- -              while (intel_kick_waiters(dev_priv) ||
- -                     intel_kick_signalers(dev_priv))
- -                      yield();
- -      }
- -
- -      /* Finally reset hw state */
- -      for_each_engine(engine, dev_priv)
- -              intel_ring_init_seqno(engine, seqno);
- -
- -      return 0;
- -}
- -
- -int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
- -{
- -      struct drm_i915_private *dev_priv = to_i915(dev);
- -      int ret;
- -
- -      if (seqno == 0)
- -              return -EINVAL;
- -
- -      /* HWS page needs to be set less than what we
- -       * will inject to ring
- -       */
- -      ret = i915_gem_init_seqno(dev_priv, seqno - 1);
- -      if (ret)
- -              return ret;
- -
- -      /* Carefully set the last_seqno value so that wrap
- -       * detection still works
- -       */
- -      dev_priv->next_seqno = seqno;
- -      dev_priv->last_seqno = seqno - 1;
- -      if (dev_priv->last_seqno == 0)
- -              dev_priv->last_seqno--;
- -
- -      return 0;
- -}
- -
- -int
- -i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
- -{
- -      /* reserve 0 for non-seqno */
- -      if (dev_priv->next_seqno == 0) {
- -              int ret = i915_gem_init_seqno(dev_priv, 0);
- -              if (ret)
- -                      return ret;
- -
- -              dev_priv->next_seqno = 1;
- -      }
- -
- -      *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
- -      return 0;
- -}
- -
- -static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
- -{
- -      struct drm_i915_private *dev_priv = engine->i915;
- -
- -      dev_priv->gt.active_engines |= intel_engine_flag(engine);
- -      if (dev_priv->gt.awake)
- -              return;
- -
- -      intel_runtime_pm_get_noresume(dev_priv);
- -      dev_priv->gt.awake = true;
- -
- -      i915_update_gfx_val(dev_priv);
- -      if (INTEL_GEN(dev_priv) >= 6)
- -              gen6_rps_busy(dev_priv);
- -
- -      queue_delayed_work(dev_priv->wq,
- -                         &dev_priv->gt.retire_work,
- -                         round_jiffies_up_relative(HZ));
- -}
- -
- -/*
- - * NB: This function is not allowed to fail. Doing so would mean the the
- - * request is not being tracked for completion but the work itself is
- - * going to happen on the hardware. This would be a Bad Thing(tm).
- - */
- -void __i915_add_request(struct drm_i915_gem_request *request,
- -                      struct drm_i915_gem_object *obj,
- -                      bool flush_caches)
- -{
- -      struct intel_engine_cs *engine;
- -      struct intel_ringbuffer *ringbuf;
- -      u32 request_start;
- -      u32 reserved_tail;
- -      int ret;
- -
- -      if (WARN_ON(request == NULL))
- -              return;
- -
- -      engine = request->engine;
- -      ringbuf = request->ringbuf;
- -
- -      /*
- -       * To ensure that this call will not fail, space for its emissions
- -       * should already have been reserved in the ring buffer. Let the ring
- -       * know that it is time to use that space up.
- -       */
- -      request_start = intel_ring_get_tail(ringbuf);
- -      reserved_tail = request->reserved_space;
- -      request->reserved_space = 0;
- -
- -      /*
- -       * Emit any outstanding flushes - execbuf can fail to emit the flush
- -       * after having emitted the batchbuffer command. Hence we need to fix
- -       * things up similar to emitting the lazy request. The difference here
- -       * is that the flush _must_ happen before the next request, no matter
- -       * what.
- -       */
- -      if (flush_caches) {
- -              if (i915.enable_execlists)
- -                      ret = logical_ring_flush_all_caches(request);
- -              else
- -                      ret = intel_ring_flush_all_caches(request);
- -              /* Not allowed to fail! */
- -              WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
- -      }
- -
- -      trace_i915_gem_request_add(request);
+ +      int idx = request->engine->id;
+ +      struct drm_i915_gem_object *obj =
+ +              container_of(active, struct drm_i915_gem_object, last_read[idx]);
   
- -      request->head = request_start;
+ +      GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
   
- -      /* Whilst this request exists, batch_obj will be on the
- -       * active_list, and so will hold the active reference. Only when this
- -       * request is retired will the the batch_obj be moved onto the
- -       * inactive_list and lose its active reference. Hence we do not need
- -       * to explicitly hold another reference here.
- -       */
- -      request->batch_obj = obj;
+ +      i915_gem_object_clear_active(obj, idx);
+ +      if (i915_gem_object_is_active(obj))
+ +              return;
   
- -      /* Seal the request and mark it as pending execution. Note that
- -       * we may inspect this state, without holding any locks, during
- -       * hangcheck. Hence we apply the barrier to ensure that we do not
- -       * see a more recent value in the hws than we are tracking.
- -       */
- -      request->emitted_jiffies = jiffies;
- -      request->previous_seqno = engine->last_submitted_seqno;
- -      smp_store_mb(engine->last_submitted_seqno, request->seqno);
- -      list_add_tail(&request->list, &engine->request_list);
- -
- -      /* Record the position of the start of the request so that
- -       * should we detect the updated seqno part-way through the
- -       * GPU processing the request, we never over-estimate the
- -       * position of the head.
+ +      /* Bump our place on the bound list to keep it roughly in LRU order
+ +       * so that we don't steal from recently used but inactive objects
+ +       * (unless we are forced to ofc!)
          */
- -      request->postfix = intel_ring_get_tail(ringbuf);
- -
- -      if (i915.enable_execlists)
- -              ret = engine->emit_request(request);
- -      else {
- -              ret = engine->add_request(request);
+ +      if (obj->bind_count)
+ +              list_move_tail(&obj->global_list,
+ +                             &request->i915->mm.bound_list);
   
- -              request->tail = intel_ring_get_tail(ringbuf);
- -      }
- -      /* Not allowed to fail! */
- -      WARN(ret, "emit|add_request failed: %d!\n", ret);
- -      /* Sanity check that the reserved size was large enough. */
- -      ret = intel_ring_get_tail(ringbuf) - request_start;
- -      if (ret < 0)
- -              ret += ringbuf->size;
- -      WARN_ONCE(ret > reserved_tail,
- -                "Not enough space reserved (%d bytes) "
- -                "for adding the request (%d bytes)\n",
- -                reserved_tail, ret);
- -
- -      i915_gem_mark_busy(engine);
+ +      i915_gem_object_put(obj);
   }
   
   static bool i915_context_is_banned(const struct i915_gem_context *ctx)
@@@ -2532,6 -2981,101 +2532,6 @@@ static void i915_set_reset_status(struc
         }
   }
   
- -void i915_gem_request_free(struct kref *req_ref)
- -{
- -      struct drm_i915_gem_request *req = container_of(req_ref,
- -                                               typeof(*req), ref);
- -      kmem_cache_free(req->i915->requests, req);
- -}
- -
- -static inline int
- -__i915_gem_request_alloc(struct intel_engine_cs *engine,
- -                       struct i915_gem_context *ctx,
- -                       struct drm_i915_gem_request **req_out)
- -{
- -      struct drm_i915_private *dev_priv = engine->i915;
- -      unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
- -      struct drm_i915_gem_request *req;
- -      int ret;
- -
- -      if (!req_out)
- -              return -EINVAL;
- -
- -      *req_out = NULL;
- -
- -      /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
- -       * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
- -       * and restart.
- -       */
- -      ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
- -      if (ret)
- -              return ret;
- -
- -      req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
- -      if (req == NULL)
- -              return -ENOMEM;
- -
- -      ret = i915_gem_get_seqno(engine->i915, &req->seqno);
- -      if (ret)
- -              goto err;
- -
- -      kref_init(&req->ref);
- -      req->i915 = dev_priv;
- -      req->engine = engine;
- -      req->ctx  = ctx;
- -      i915_gem_context_reference(req->ctx);
- -
- -      /*
- -       * Reserve space in the ring buffer for all the commands required to
- -       * eventually emit this request. This is to guarantee that the
- -       * i915_add_request() call can't fail. Note that the reserve may need
- -       * to be redone if the request is not actually submitted straight
- -       * away, e.g. because a GPU scheduler has deferred it.
- -       */
- -      req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
- -
- -      if (i915.enable_execlists)
- -              ret = intel_logical_ring_alloc_request_extras(req);
- -      else
- -              ret = intel_ring_alloc_request_extras(req);
- -      if (ret)
- -              goto err_ctx;
- -
- -      *req_out = req;
- -      return 0;
- -
- -err_ctx:
- -      i915_gem_context_unreference(ctx);
- -err:
- -      kmem_cache_free(dev_priv->requests, req);
- -      return ret;
- -}
- -
- -/**
- - * i915_gem_request_alloc - allocate a request structure
- - *
- - * @engine: engine that we wish to issue the request on.
- - * @ctx: context that the request will be associated with.
- - *       This can be NULL if the request is not directly related to
- - *       any specific user context, in which case this function will
- - *       choose an appropriate context to use.
- - *
- - * Returns a pointer to the allocated request if successful,
- - * or an error code if not.
- - */
- -struct drm_i915_gem_request *
- -i915_gem_request_alloc(struct intel_engine_cs *engine,
- -                     struct i915_gem_context *ctx)
- -{
- -      struct drm_i915_gem_request *req;
- -      int err;
- -
- -      if (ctx == NULL)
- -              ctx = engine->i915->kernel_context;
- -      err = __i915_gem_request_alloc(engine, ctx, &req);
- -      return err ? ERR_PTR(err) : req;
- -}
- -
   struct drm_i915_gem_request *
   i915_gem_find_active_request(struct intel_engine_cs *engine)
   {
@@@ -2545,136 -3089,185 +2545,139 @@@
          * extra delay for a recent interrupt is pointless. Hence, we do
          * not need an engine->irq_seqno_barrier() before the seqno reads.
          */
- -      list_for_each_entry(request, &engine->request_list, list) {
+ +      list_for_each_entry(request, &engine->request_list, link) {
                 if (i915_gem_request_completed(request))
                         continue;
   
+ +              if (!i915_sw_fence_done(&request->submit))
+ +                      break;
+ +
                 return request;
         }
   
         return NULL;
   }
   
- -static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
+ +static void reset_request(struct drm_i915_gem_request *request)
+ +{
+ +      void *vaddr = request->ring->vaddr;
+ +      u32 head;
+ +
+ +      /* As this request likely depends on state from the lost
+ +       * context, clear out all the user operations leaving the
+ +       * breadcrumb at the end (so we get the fence notifications).
+ +       */
+ +      head = request->head;
+ +      if (request->postfix < head) {
+ +              memset(vaddr + head, 0, request->ring->size - head);
+ +              head = 0;
+ +      }
+ +      memset(vaddr + head, 0, request->postfix - head);
+ +}
+ +
+ +static void i915_gem_reset_engine(struct intel_engine_cs *engine)
   {
         struct drm_i915_gem_request *request;
+ +      struct i915_gem_context *incomplete_ctx;
         bool ring_hung;
   
+ +      /* Ensure irq handler finishes, and not run again. */
+ +      tasklet_kill(&engine->irq_tasklet);
+ +      if (engine->irq_seqno_barrier)
+ +              engine->irq_seqno_barrier(engine);
+ +
         request = i915_gem_find_active_request(engine);
- -      if (request == NULL)
+ +      if (!request)
                 return;
   
         ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
- -
         i915_set_reset_status(request->ctx, ring_hung);
- -      list_for_each_entry_continue(request, &engine->request_list, list)
- -              i915_set_reset_status(request->ctx, false);
- -}
- -
- -static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
- -{
- -      struct intel_ringbuffer *buffer;
- -
- -      while (!list_empty(&engine->active_list)) {
- -              struct drm_i915_gem_object *obj;
- -
- -              obj = list_first_entry(&engine->active_list,
- -                                     struct drm_i915_gem_object,
- -                                     engine_list[engine->id]);
- -
- -              i915_gem_object_retire__read(obj, engine->id);
- -      }
- -
- -      /*
- -       * Clear the execlists queue up before freeing the requests, as those
- -       * are the ones that keep the context and ringbuffer backing objects
- -       * pinned in place.
- -       */
- -
- -      if (i915.enable_execlists) {
- -              /* Ensure irq handler finishes or is cancelled. */
- -              tasklet_kill(&engine->irq_tasklet);
- -
- -              intel_execlists_cancel_requests(engine);
- -      }
- -
- -      /*
- -       * We must free the requests after all the corresponding objects have
- -       * been moved off active lists. Which is the same order as the normal
- -       * retire_requests function does. This is important if object hold
- -       * implicit references on things like e.g. ppgtt address spaces through
- -       * the request.
- -       */
- -      while (!list_empty(&engine->request_list)) {
- -              struct drm_i915_gem_request *request;
+ +      if (!ring_hung)
+ +              return;
   
- -              request = list_first_entry(&engine->request_list,
- -                                         struct drm_i915_gem_request,
- -                                         list);
+ +      DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
+ +                       engine->name, request->fence.seqno);
   
- -              i915_gem_request_retire(request);
- -      }
+ +      /* Setup the CS to resume from the breadcrumb of the hung request */
+ +      engine->reset_hw(engine, request);
   
- -      /* Having flushed all requests from all queues, we know that all
- -       * ringbuffers must now be empty. However, since we do not reclaim
- -       * all space when retiring the request (to prevent HEADs colliding
- -       * with rapid ringbuffer wraparound) the amount of available space
- -       * upon reset is less than when we start. Do one more pass over
- -       * all the ringbuffers to reset last_retired_head.
+ +      /* Users of the default context do not rely on logical state
+ +       * preserved between batches. They have to emit full state on
+ +       * every batch and so it is safe to execute queued requests following
+ +       * the hang.
+ +       *
+ +       * Other contexts preserve state, now corrupt. We want to skip all
+ +       * queued requests that reference the corrupt context.
          */
- -      list_for_each_entry(buffer, &engine->buffers, link) {
- -              buffer->last_retired_head = buffer->tail;
- -              intel_ring_update_space(buffer);
- -      }
+ +      incomplete_ctx = request->ctx;
+ +      if (i915_gem_context_is_default(incomplete_ctx))
+ +              return;
   
- -      intel_ring_init_seqno(engine, engine->last_submitted_seqno);
+ +      list_for_each_entry_continue(request, &engine->request_list, link)
+ +              if (request->ctx == incomplete_ctx)
+ +                      reset_request(request);
+ 
+       engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
   }
   
- -void i915_gem_reset(struct drm_device *dev)
+ +void i915_gem_reset(struct drm_i915_private *dev_priv)
   {
- -      struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_engine_cs *engine;
   
- -      /*
- -       * Before we free the objects from the requests, we need to inspect
- -       * them for finding the guilty party. As the requests only borrow
- -       * their reference to the objects, the inspection must be done first.
- -       */
- -      for_each_engine(engine, dev_priv)
- -              i915_gem_reset_engine_status(engine);
+ +      i915_gem_retire_requests(dev_priv);
   
         for_each_engine(engine, dev_priv)
- -              i915_gem_reset_engine_cleanup(engine);
+ +              i915_gem_reset_engine(engine);
+       mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
   
- -      i915_gem_context_reset(dev);
- -
- -      i915_gem_restore_fences(dev);
+ +      i915_gem_restore_fences(&dev_priv->drm);
+ +}
   
- -      WARN_ON(i915_verify_lists(dev));
+ +static void nop_submit_request(struct drm_i915_gem_request *request)
+ +{
   }
   
- -/**
- - * This function clears the request list as sequence numbers are passed.
- - * @engine: engine to retire requests on
- - */
- -void
- -i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
+ +static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
   {
- -      WARN_ON(i915_verify_lists(engine->dev));
+ +      engine->submit_request = nop_submit_request;
   
- -      /* Retire requests first as we use it above for the early return.
- -       * If we retire requests last, we may use a later seqno and so clear
- -       * the requests lists without clearing the active list, leading to
- -       * confusion.
+ +      /* Mark all pending requests as complete so that any concurrent
+ +       * (lockless) lookup doesn't try and wait upon the request as we
+ +       * reset it.
          */
- -      while (!list_empty(&engine->request_list)) {
- -              struct drm_i915_gem_request *request;
- -
- -              request = list_first_entry(&engine->request_list,
- -                                         struct drm_i915_gem_request,
- -                                         list);
- -
- -              if (!i915_gem_request_completed(request))
- -                      break;
- -
- -              i915_gem_request_retire(request);
- -      }
+ +      intel_engine_init_seqno(engine, engine->last_submitted_seqno);
   
- -      /* Move any buffers on the active list that are no longer referenced
- -       * by the ringbuffer to the flushing/inactive lists as appropriate,
- -       * before we free the context associated with the requests.
+ +      /*
+ +       * Clear the execlists queue up before freeing the requests, as those
+ +       * are the ones that keep the context and ringbuffer backing objects
+ +       * pinned in place.
          */
- -      while (!list_empty(&engine->active_list)) {
- -              struct drm_i915_gem_object *obj;
- -
- -              obj = list_first_entry(&engine->active_list,
- -                                     struct drm_i915_gem_object,
- -                                     engine_list[engine->id]);
- -
- -              if (!list_empty(&obj->last_read_req[engine->id]->list))
- -                      break;
   
- -              i915_gem_object_retire__read(obj, engine->id);
+ +      if (i915.enable_execlists) {
+ +              spin_lock(&engine->execlist_lock);
+ +              INIT_LIST_HEAD(&engine->execlist_queue);
+ +              i915_gem_request_put(engine->execlist_port[0].request);
+ +              i915_gem_request_put(engine->execlist_port[1].request);
+ +              memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+ +              spin_unlock(&engine->execlist_lock);
         }
   
- -      WARN_ON(i915_verify_lists(engine->dev));
+ +      engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
   }
   
- -void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
+ +void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
   {
         struct intel_engine_cs *engine;
   
         lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ +      set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
   
- -      if (dev_priv->gt.active_engines == 0)
- -              return;
- -
- -      GEM_BUG_ON(!dev_priv->gt.awake);
- -
- -      for_each_engine(engine, dev_priv) {
- -              i915_gem_retire_requests_ring(engine);
- -              if (list_empty(&engine->request_list))
- -                      dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
- -      }
+ +      i915_gem_context_lost(dev_priv);
+ +      for_each_engine(engine, dev_priv)
+ +              i915_gem_cleanup_engine(engine);
+ +      mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
   
- -      if (dev_priv->gt.active_engines == 0)
- -              queue_delayed_work(dev_priv->wq,
- -                                 &dev_priv->gt.idle_work,
- -                                 msecs_to_jiffies(100));
+ +      i915_gem_retire_requests(dev_priv);
   }
   
   static void
@@@ -2694,12 -3287,10 +2697,12 @@@ i915_gem_retire_work_handler(struct wor
          * We do not need to do this test under locking as in the worst-case
          * we queue the retire worker once too often.
          */
- -      if (READ_ONCE(dev_priv->gt.awake))
+ +      if (READ_ONCE(dev_priv->gt.awake)) {
+ +              i915_queue_hangcheck(dev_priv);
                 queue_delayed_work(dev_priv->wq,
                                    &dev_priv->gt.retire_work,
                                    round_jiffies_up_relative(HZ));
+ +      }
   }
   
   static void
@@@ -2709,6 -3300,7 +2712,6 @@@ i915_gem_idle_work_handler(struct work_
                 container_of(work, typeof(*dev_priv), gt.idle_work.work);
         struct drm_device *dev = &dev_priv->drm;
         struct intel_engine_cs *engine;
- -      unsigned int stuck_engines;
         bool rearm_hangcheck;
   
         if (!READ_ONCE(dev_priv->gt.awake))
@@@ -2721,106 -3313,311 +2724,106 @@@
                 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
   
         if (!mutex_trylock(&dev->struct_mutex)) {
- -              /* Currently busy, come back later */
- -              mod_delayed_work(dev_priv->wq,
- -                               &dev_priv->gt.idle_work,
- -                               msecs_to_jiffies(50));
- -              goto out_rearm;
- -      }
- -
- -      if (dev_priv->gt.active_engines)
- -              goto out_unlock;
- -
- -      for_each_engine(engine, dev_priv)
- -              i915_gem_batch_pool_fini(&engine->batch_pool);
- -
- -      GEM_BUG_ON(!dev_priv->gt.awake);
- -      dev_priv->gt.awake = false;
- -      rearm_hangcheck = false;
- -
- -      stuck_engines = intel_kick_waiters(dev_priv);
- -      if (unlikely(stuck_engines)) {
- -              DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n");
- -              dev_priv->gpu_error.missed_irq_rings |= stuck_engines;
- -      }
- -
- -      if (INTEL_GEN(dev_priv) >= 6)
- -              gen6_rps_idle(dev_priv);
- -      intel_runtime_pm_put(dev_priv);
- -out_unlock:
- -      mutex_unlock(&dev->struct_mutex);
- -
- -out_rearm:
- -      if (rearm_hangcheck) {
- -              GEM_BUG_ON(!dev_priv->gt.awake);
- -              i915_queue_hangcheck(dev_priv);
- -      }
- -}
- -
- -/**
- - * Ensures that an object will eventually get non-busy by flushing any required
- - * write domains, emitting any outstanding lazy request and retiring and
- - * completed requests.
- - * @obj: object to flush
- - */
- -static int
- -i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
- -{
- -      int i;
- -
- -      if (!obj->active)
- -              return 0;
- -
- -      for (i = 0; i < I915_NUM_ENGINES; i++) {
- -              struct drm_i915_gem_request *req;
- -
- -              req = obj->last_read_req[i];
- -              if (req == NULL)
- -                      continue;
- -
- -              if (i915_gem_request_completed(req))
- -                      i915_gem_object_retire__read(obj, i);
- -      }
- -
- -      return 0;
- -}
- -
- -/**
- - * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
- - * @dev: drm device pointer
- - * @data: ioctl data blob
- - * @file: drm file pointer
- - *
- - * Returns 0 if successful, else an error is returned with the remaining time in
- - * the timeout parameter.
- - *  -ETIME: object is still busy after timeout
- - *  -ERESTARTSYS: signal interrupted the wait
- - *  -ENONENT: object doesn't exist
- - * Also possible, but rare:
- - *  -EAGAIN: GPU wedged
- - *  -ENOMEM: damn
- - *  -ENODEV: Internal IRQ fail
- - *  -E?: The add request failed
- - *
- - * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
- - * non-zero timeout parameter the wait ioctl will wait for the given number of
- - * nanoseconds on an object becoming unbusy. Since the wait itself does so
- - * without holding struct_mutex the object may become re-busied before this
- - * function completes. A similar but shorter * race condition exists in the busy
- - * ioctl
- - */
- -int
- -i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
- -{
- -      struct drm_i915_gem_wait *args = data;
- -      struct drm_i915_gem_object *obj;
- -      struct drm_i915_gem_request *req[I915_NUM_ENGINES];
- -      int i, n = 0;
- -      int ret;
- -
- -      if (args->flags != 0)
- -              return -EINVAL;
- -
- -      ret = i915_mutex_lock_interruptible(dev);
- -      if (ret)
- -              return ret;
- -
- -      obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle));
- -      if (&obj->base == NULL) {
- -              mutex_unlock(&dev->struct_mutex);
- -              return -ENOENT;
- -      }
- -
- -      /* Need to make sure the object gets inactive eventually. */
- -      ret = i915_gem_object_flush_active(obj);
- -      if (ret)
- -              goto out;
- -
- -      if (!obj->active)
- -              goto out;
- -
- -      /* Do this after OLR check to make sure we make forward progress polling
- -       * on this IOCTL with a timeout == 0 (like busy ioctl)
- -       */
- -      if (args->timeout_ns == 0) {
- -              ret = -ETIME;
- -              goto out;
- -      }
- -
- -      drm_gem_object_unreference(&obj->base);
- -
- -      for (i = 0; i < I915_NUM_ENGINES; i++) {
- -              if (obj->last_read_req[i] == NULL)
- -                      continue;
- -
- -              req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
- -      }
- -
- -      mutex_unlock(&dev->struct_mutex);
- -
- -      for (i = 0; i < n; i++) {
- -              if (ret == 0)
- -                      ret = __i915_wait_request(req[i], true,
- -                                                args->timeout_ns > 0 ? &args->timeout_ns : NULL,
- -                                                to_rps_client(file));
- -              i915_gem_request_unreference(req[i]);
- -      }
- -      return ret;
- -
- -out:
- -      drm_gem_object_unreference(&obj->base);
- -      mutex_unlock(&dev->struct_mutex);
- -      return ret;
- -}
- -
- -static int
- -__i915_gem_object_sync(struct drm_i915_gem_object *obj,
- -                     struct intel_engine_cs *to,
- -                     struct drm_i915_gem_request *from_req,
- -                     struct drm_i915_gem_request **to_req)
- -{
- -      struct intel_engine_cs *from;
- -      int ret;
- -
- -      from = i915_gem_request_get_engine(from_req);
- -      if (to == from)
- -              return 0;
- -
- -      if (i915_gem_request_completed(from_req))
- -              return 0;
- -
- -      if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) {
- -              struct drm_i915_private *i915 = to_i915(obj->base.dev);
- -              ret = __i915_wait_request(from_req,
- -                                        i915->mm.interruptible,
- -                                        NULL,
- -                                        &i915->rps.semaphores);
- -              if (ret)
- -                      return ret;
- -
- -              i915_gem_object_retire_request(obj, from_req);
- -      } else {
- -              int idx = intel_ring_sync_index(from, to);
- -              u32 seqno = i915_gem_request_get_seqno(from_req);
- -
- -              WARN_ON(!to_req);
- -
- -              if (seqno <= from->semaphore.sync_seqno[idx])
- -                      return 0;
+ +              /* Currently busy, come back later */
+ +              mod_delayed_work(dev_priv->wq,
+ +                               &dev_priv->gt.idle_work,
+ +                               msecs_to_jiffies(50));
+ +              goto out_rearm;
+ +      }
   
- -              if (*to_req == NULL) {
- -                      struct drm_i915_gem_request *req;
+ +      if (dev_priv->gt.active_engines)
+ +              goto out_unlock;
   
- -                      req = i915_gem_request_alloc(to, NULL);
- -                      if (IS_ERR(req))
- -                              return PTR_ERR(req);
+ +      for_each_engine(engine, dev_priv)
+ +              i915_gem_batch_pool_fini(&engine->batch_pool);
   
- -                      *to_req = req;
- -              }
+ +      GEM_BUG_ON(!dev_priv->gt.awake);
+ +      dev_priv->gt.awake = false;
+ +      rearm_hangcheck = false;
   
- -              trace_i915_gem_ring_sync_to(*to_req, from, from_req);
- -              ret = to->semaphore.sync_to(*to_req, from, seqno);
- -              if (ret)
- -                      return ret;
+ +      if (INTEL_GEN(dev_priv) >= 6)
+ +              gen6_rps_idle(dev_priv);
+ +      intel_runtime_pm_put(dev_priv);
+ +out_unlock:
+ +      mutex_unlock(&dev->struct_mutex);
   
- -              /* We use last_read_req because sync_to()
- -               * might have just caused seqno wrap under
- -               * the radar.
- -               */
- -              from->semaphore.sync_seqno[idx] =
- -                      i915_gem_request_get_seqno(obj->last_read_req[from->id]);
+ +out_rearm:
+ +      if (rearm_hangcheck) {
+ +              GEM_BUG_ON(!dev_priv->gt.awake);
+ +              i915_queue_hangcheck(dev_priv);
         }
+ +}
   
- -      return 0;
+ +void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
+ +{
+ +      struct drm_i915_gem_object *obj = to_intel_bo(gem);
+ +      struct drm_i915_file_private *fpriv = file->driver_priv;
+ +      struct i915_vma *vma, *vn;
+ +
+ +      mutex_lock(&obj->base.dev->struct_mutex);
+ +      list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
+ +              if (vma->vm->file == fpriv)
+ +                      i915_vma_close(vma);
+ +      mutex_unlock(&obj->base.dev->struct_mutex);
   }
   
   /**
- - * i915_gem_object_sync - sync an object to a ring.
- - *
- - * @obj: object which may be in use on another ring.
- - * @to: ring we wish to use the object on. May be NULL.
- - * @to_req: request we wish to use the object for. See below.
- - *          This will be allocated and returned if a request is
- - *          required but not passed in.
- - *
- - * This code is meant to abstract object synchronization with the GPU.
- - * Calling with NULL implies synchronizing the object with the CPU
- - * rather than a particular GPU ring. Conceptually we serialise writes
- - * between engines inside the GPU. We only allow one engine to write
- - * into a buffer at any time, but multiple readers. To ensure each has
- - * a coherent view of memory, we must:
- - *
- - * - If there is an outstanding write request to the object, the new
- - *   request must wait for it to complete (either CPU or in hw, requests
- - *   on the same ring will be naturally ordered).
- - *
- - * - If we are a write request (pending_write_domain is set), the new
- - *   request must wait for outstanding read requests to complete.
+ + * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
+ + * @dev: drm device pointer
+ + * @data: ioctl data blob
+ + * @file: drm file pointer
    *
- - * For CPU synchronisation (NULL to) no request is required. For syncing with
- - * rings to_req must be non-NULL. However, a request does not have to be
- - * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
- - * request will be allocated automatically and returned through *to_req. Note
- - * that it is not guaranteed that commands will be emitted (because the system
- - * might already be idle). Hence there is no need to create a request that
- - * might never have any work submitted. Note further that if a request is
- - * returned in *to_req, it is the responsibility of the caller to submit
- - * that request (after potentially adding more work to it).
+ + * Returns 0 if successful, else an error is returned with the remaining time in
+ + * the timeout parameter.
+ + *  -ETIME: object is still busy after timeout
+ + *  -ERESTARTSYS: signal interrupted the wait
+ + *  -ENONENT: object doesn't exist
+ + * Also possible, but rare:
+ + *  -EAGAIN: GPU wedged
+ + *  -ENOMEM: damn
+ + *  -ENODEV: Internal IRQ fail
+ + *  -E?: The add request failed
    *
- - * Returns 0 if successful, else propagates up the lower layer error.
+ + * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
+ + * non-zero timeout parameter the wait ioctl will wait for the given number of
+ + * nanoseconds on an object becoming unbusy. Since the wait itself does so
+ + * without holding struct_mutex the object may become re-busied before this
+ + * function completes. A similar but shorter * race condition exists in the busy
+ + * ioctl
    */
   int
- -i915_gem_object_sync(struct drm_i915_gem_object *obj,
- -                   struct intel_engine_cs *to,
- -                   struct drm_i915_gem_request **to_req)
+ +i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
   {
- -      const bool readonly = obj->base.pending_write_domain == 0;
- -      struct drm_i915_gem_request *req[I915_NUM_ENGINES];
- -      int ret, i, n;
+ +      struct drm_i915_gem_wait *args = data;
+ +      struct intel_rps_client *rps = to_rps_client(file);
+ +      struct drm_i915_gem_object *obj;
+ +      unsigned long active;
+ +      int idx, ret = 0;
   
- -      if (!obj->active)
- -              return 0;
+ +      if (args->flags != 0)
+ +              return -EINVAL;
   
- -      if (to == NULL)
- -              return i915_gem_object_wait_rendering(obj, readonly);
+ +      obj = i915_gem_object_lookup(file, args->bo_handle);
+ +      if (!obj)
+ +              return -ENOENT;
   
- -      n = 0;
- -      if (readonly) {
- -              if (obj->last_write_req)
- -                      req[n++] = obj->last_write_req;
- -      } else {
- -              for (i = 0; i < I915_NUM_ENGINES; i++)
- -                      if (obj->last_read_req[i])
- -                              req[n++] = obj->last_read_req[i];
- -      }
- -      for (i = 0; i < n; i++) {
- -              ret = __i915_gem_object_sync(obj, to, req[i], to_req);
+ +      active = __I915_BO_ACTIVE(obj);
+ +      for_each_active(active, idx) {
+ +              s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
+ +              ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
+ +                                                  I915_WAIT_INTERRUPTIBLE,
+ +                                                  timeout, rps);
                 if (ret)
- -                      return ret;
+ +                      break;
         }
   
- -      return 0;
- -}
- -
- -static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
- -{
- -      u32 old_write_domain, old_read_domains;
- -
- -      /* Force a pagefault for domain tracking on next user access */
- -      i915_gem_release_mmap(obj);
- -
- -      if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
- -              return;
- -
- -      old_read_domains = obj->base.read_domains;
- -      old_write_domain = obj->base.write_domain;
- -
- -      obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
- -      obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
- -
- -      trace_i915_gem_object_change_domain(obj,
- -                                          old_read_domains,
- -                                          old_write_domain);
+ +      i915_gem_object_put_unlocked(obj);
+ +      return ret;
   }
   
   static void __i915_vma_iounmap(struct i915_vma *vma)
   {
- -      GEM_BUG_ON(vma->pin_count);
+ +      GEM_BUG_ON(i915_vma_is_pinned(vma));
   
         if (vma->iomap == NULL)
                 return;
@@@ -2829,83 -3626,65 +2832,83 @@@
         vma->iomap = NULL;
   }
   
- -static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
+ +int i915_vma_unbind(struct i915_vma *vma)
   {
         struct drm_i915_gem_object *obj = vma->obj;
- -      struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ +      unsigned long active;
         int ret;
   
- -      if (list_empty(&vma->obj_link))
- -              return 0;
- -
- -      if (!drm_mm_node_allocated(&vma->node)) {
- -              i915_gem_vma_destroy(vma);
- -              return 0;
- -      }
- -
- -      if (vma->pin_count)
- -              return -EBUSY;
+ +      /* First wait upon any activity as retiring the request may
+ +       * have side-effects such as unpinning or even unbinding this vma.
+ +       */
+ +      active = i915_vma_get_active(vma);
+ +      if (active) {
+ +              int idx;
+ +
+ +              /* When a closed VMA is retired, it is unbound - eek.
+ +               * In order to prevent it from being recursively closed,
+ +               * take a pin on the vma so that the second unbind is
+ +               * aborted.
+ +               */
+ +              __i915_vma_pin(vma);
   
- -      BUG_ON(obj->pages == NULL);
+ +              for_each_active(active, idx) {
+ +                      ret = i915_gem_active_retire(&vma->last_read[idx],
+ +                                                 &vma->vm->dev->struct_mutex);
+ +                      if (ret)
+ +                              break;
+ +              }
   
- -      if (wait) {
- -              ret = i915_gem_object_wait_rendering(obj, false);
+ +              __i915_vma_unpin(vma);
                 if (ret)
                         return ret;
+ +
+ +              GEM_BUG_ON(i915_vma_is_active(vma));
         }
   
- -      if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
- -              i915_gem_object_finish_gtt(obj);
+ +      if (i915_vma_is_pinned(vma))
+ +              return -EBUSY;
+ +
+ +      if (!drm_mm_node_allocated(&vma->node))
+ +              goto destroy;
   
+ +      GEM_BUG_ON(obj->bind_count == 0);
+ +      GEM_BUG_ON(!obj->pages);
+ +
+ +      if (i915_vma_is_map_and_fenceable(vma)) {
                 /* release the fence reg _after_ flushing */
- -              ret = i915_gem_object_put_fence(obj);
+ +              ret = i915_vma_put_fence(vma);
                 if (ret)
                         return ret;
   
+ +              /* Force a pagefault for domain tracking on next user access */
+ +              i915_gem_release_mmap(obj);
+ +
                 __i915_vma_iounmap(vma);
+ +              vma->flags &= ~I915_VMA_CAN_FENCE;
         }
   
- -      trace_i915_vma_unbind(vma);
- -
- -      vma->vm->unbind_vma(vma);
- -      vma->bound = 0;
- -
- -      list_del_init(&vma->vm_link);
- -      if (vma->is_ggtt) {
- -              if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
- -                      obj->map_and_fenceable = false;
- -              } else if (vma->ggtt_view.pages) {
- -                      sg_free_table(vma->ggtt_view.pages);
- -                      kfree(vma->ggtt_view.pages);
- -              }
- -              vma->ggtt_view.pages = NULL;
+ +      if (likely(!vma->vm->closed)) {
+ +              trace_i915_vma_unbind(vma);
+ +              vma->vm->unbind_vma(vma);
         }
+ +      vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
   
         drm_mm_remove_node(&vma->node);
- -      i915_gem_vma_destroy(vma);
+ +      list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+ +
+ +      if (vma->pages != obj->pages) {
+ +              GEM_BUG_ON(!vma->pages);
+ +              sg_free_table(vma->pages);
+ +              kfree(vma->pages);
+ +      }
+ +      vma->pages = NULL;
   
         /* Since the unbound list is global, only move to that list if
          * no more VMAs exist. */
- -      if (list_empty(&obj->vma_list))
- -              list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
+ +      if (--obj->bind_count == 0)
+ +              list_move_tail(&obj->global_list,
+ +                             &to_i915(obj->base.dev)->mm.unbound_list);
   
         /* And finally now the object is completely decoupled from this vma,
          * we can drop its hold on the backing storage and allow it to be
@@@ -2913,28 -3692,36 +2916,28 @@@
          */
         i915_gem_object_unpin_pages(obj);
   
- -      return 0;
- -}
- -
- -int i915_vma_unbind(struct i915_vma *vma)
- -{
- -      return __i915_vma_unbind(vma, true);
- -}
+ +destroy:
+ +      if (unlikely(i915_vma_is_closed(vma)))
+ +              i915_vma_destroy(vma);
   
- -int __i915_vma_unbind_no_wait(struct i915_vma *vma)
- -{
- -      return __i915_vma_unbind(vma, false);
+ +      return 0;
   }
   
- -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv)
+ +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
+ +                         unsigned int flags)
   {
         struct intel_engine_cs *engine;
         int ret;
   
- -      lockdep_assert_held(&dev_priv->drm.struct_mutex);
- -
         for_each_engine(engine, dev_priv) {
                 if (engine->last_context == NULL)
                         continue;
   
- -              ret = intel_engine_idle(engine);
+ +              ret = intel_engine_idle(engine, flags);
                 if (ret)
                         return ret;
         }
   
- -      WARN_ON(i915_verify_lists(dev));
         return 0;
   }
   
@@@ -2972,87 -3759,128 +2975,87 @@@ static bool i915_gem_valid_gtt_space(st
   }
   
   /**
- - * Finds free space in the GTT aperture and binds the object or a view of it
- - * there.
- - * @obj: object to bind
- - * @vm: address space to bind into
- - * @ggtt_view: global gtt view if applicable
- - * @alignment: requested alignment
+ + * i915_vma_insert - finds a slot for the vma in its address space
+ + * @vma: the vma
+ + * @size: requested size in bytes (can be larger than the VMA)
+ + * @alignment: required alignment
    * @flags: mask of PIN_* flags to use
+ + *
+ + * First we try to allocate some free space that meets the requirements for
+ + * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
+ + * preferrably the oldest idle entry to make room for the new VMA.
+ + *
+ + * Returns:
+ + * 0 on success, negative error code otherwise.
    */
- -static struct i915_vma *
- -i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
- -                         struct i915_address_space *vm,
- -                         const struct i915_ggtt_view *ggtt_view,
- -                         unsigned alignment,
- -                         uint64_t flags)
+ +static int
+ +i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
   {
- -      struct drm_device *dev = obj->base.dev;
- -      struct drm_i915_private *dev_priv = to_i915(dev);
- -      struct i915_ggtt *ggtt = &dev_priv->ggtt;
- -      u32 fence_alignment, unfenced_alignment;
- -      u32 search_flag, alloc_flag;
+ +      struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
+ +      struct drm_i915_gem_object *obj = vma->obj;
         u64 start, end;
- -      u64 size, fence_size;
- -      struct i915_vma *vma;
         int ret;
   
- -      if (i915_is_ggtt(vm)) {
- -              u32 view_size;
- -
- -              if (WARN_ON(!ggtt_view))
- -                      return ERR_PTR(-EINVAL);
- -
- -              view_size = i915_ggtt_view_size(obj, ggtt_view);
- -
- -              fence_size = i915_gem_get_gtt_size(dev,
- -                                                 view_size,
- -                                                 obj->tiling_mode);
- -              fence_alignment = i915_gem_get_gtt_alignment(dev,
- -                                                           view_size,
- -                                                           obj->tiling_mode,
- -                                                           true);
- -              unfenced_alignment = i915_gem_get_gtt_alignment(dev,
- -                                                              view_size,
- -                                                              obj->tiling_mode,
- -                                                              false);
- -              size = flags & PIN_MAPPABLE ? fence_size : view_size;
- -      } else {
- -              fence_size = i915_gem_get_gtt_size(dev,
- -                                                 obj->base.size,
- -                                                 obj->tiling_mode);
- -              fence_alignment = i915_gem_get_gtt_alignment(dev,
- -                                                           obj->base.size,
- -                                                           obj->tiling_mode,
- -                                                           true);
- -              unfenced_alignment =
- -                      i915_gem_get_gtt_alignment(dev,
- -                                                 obj->base.size,
- -                                                 obj->tiling_mode,
- -                                                 false);
- -              size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
- -      }
+ +      GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+ +      GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
+ +
+ +      size = max(size, vma->size);
+ +      if (flags & PIN_MAPPABLE)
+ +              size = i915_gem_get_ggtt_size(dev_priv, size,
+ +                                            i915_gem_object_get_tiling(obj));
+ +
+ +      alignment = max(max(alignment, vma->display_alignment),
+ +                      i915_gem_get_ggtt_alignment(dev_priv, size,
+ +                                                  i915_gem_object_get_tiling(obj),
+ +                                                  flags & PIN_MAPPABLE));
   
         start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
- -      end = vm->total;
+ +
+ +      end = vma->vm->total;
         if (flags & PIN_MAPPABLE)
- -              end = min_t(u64, end, ggtt->mappable_end);
+ +              end = min_t(u64, end, dev_priv->ggtt.mappable_end);
         if (flags & PIN_ZONE_4G)
                 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
   
- -      if (alignment == 0)
- -              alignment = flags & PIN_MAPPABLE ? fence_alignment :
- -                                              unfenced_alignment;
- -      if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
- -              DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
- -                        ggtt_view ? ggtt_view->type : 0,
- -                        alignment);
- -              return ERR_PTR(-EINVAL);
- -      }
- -
         /* If binding the object/GGTT view requires more space than the entire
          * aperture has, reject it early before evicting everything in a vain
          * attempt to find space.
          */
         if (size > end) {
- -              DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
- -                        ggtt_view ? ggtt_view->type : 0,
- -                        size,
+ +              DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
+ +                        size, obj->base.size,
                           flags & PIN_MAPPABLE ? "mappable" : "total",
                           end);
- -              return ERR_PTR(-E2BIG);
+ +              return -E2BIG;
         }
   
         ret = i915_gem_object_get_pages(obj);
         if (ret)
- -              return ERR_PTR(ret);
+ +              return ret;
   
         i915_gem_object_pin_pages(obj);
   
- -      vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
- -                        i915_gem_obj_lookup_or_create_vma(obj, vm);
- -
- -      if (IS_ERR(vma))
- -              goto err_unpin;
- -
         if (flags & PIN_OFFSET_FIXED) {
- -              uint64_t offset = flags & PIN_OFFSET_MASK;
- -
- -              if (offset & (alignment - 1) || offset + size > end) {
+ +              u64 offset = flags & PIN_OFFSET_MASK;
+ +              if (offset & (alignment - 1) || offset > end - size) {
                         ret = -EINVAL;
- -                      goto err_free_vma;
+ +                      goto err_unpin;
                 }
+ +
                 vma->node.start = offset;
                 vma->node.size = size;
                 vma->node.color = obj->cache_level;
- -              ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ +              ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
                 if (ret) {
                         ret = i915_gem_evict_for_vma(vma);
                         if (ret == 0)
- -                              ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ +                              ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
+ +                      if (ret)
+ +                              goto err_unpin;
                 }
- -              if (ret)
- -                      goto err_free_vma;
         } else {
+ +              u32 search_flag, alloc_flag;
+ +
                 if (flags & PIN_HIGH) {
                         search_flag = DRM_MM_SEARCH_BELOW;
                         alloc_flag = DRM_MM_CREATE_TOP;
@@@ -3061,45 -3889,47 +3064,45 @@@
                         alloc_flag = DRM_MM_CREATE_DEFAULT;
                 }
   
+ +              /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
+ +               * so we know that we always have a minimum alignment of 4096.
+ +               * The drm_mm range manager is optimised to return results
+ +               * with zero alignment, so where possible use the optimal
+ +               * path.
+ +               */
+ +              if (alignment <= 4096)
+ +                      alignment = 0;
+ +
   search_free:
- -              ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+ +              ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
+ +                                                        &vma->node,
                                                           size, alignment,
                                                           obj->cache_level,
                                                           start, end,
                                                           search_flag,
                                                           alloc_flag);
                 if (ret) {
- -                      ret = i915_gem_evict_something(dev, vm, size, alignment,
+ +                      ret = i915_gem_evict_something(vma->vm, size, alignment,
                                                        obj->cache_level,
                                                        start, end,
                                                        flags);
                         if (ret == 0)
                                 goto search_free;
   
- -                      goto err_free_vma;
+ +                      goto err_unpin;
                 }
         }
- -      if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
- -              ret = -EINVAL;
- -              goto err_remove_node;
- -      }
- -
- -      trace_i915_vma_bind(vma, flags);
- -      ret = i915_vma_bind(vma, obj->cache_level, flags);
- -      if (ret)
- -              goto err_remove_node;
+ +      GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
   
         list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
- -      list_add_tail(&vma->vm_link, &vm->inactive_list);
+ +      list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+ +      obj->bind_count++;
   
- -      return vma;
+ +      return 0;
   
- -err_remove_node:
- -      drm_mm_remove_node(&vma->node);
- -err_free_vma:
- -      i915_gem_vma_destroy(vma);
- -      vma = ERR_PTR(ret);
   err_unpin:
         i915_gem_object_unpin_pages(obj);
- -      return vma;
+ +      return ret;
   }
   
   bool
@@@ -3144,72 -3974,51 +3147,72 @@@ i915_gem_clflush_object(struct drm_i915
   static void
   i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
   {
- -      uint32_t old_write_domain;
+ +      struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
   
         if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
                 return;
   
         /* No actual flushing is required for the GTT write domain.  Writes
- -       * to it immediately go to main memory as far as we know, so there's
+ +       * to it "immediately" go to main memory as far as we know, so there's
          * no chipset flush.  It also doesn't land in render cache.
          *
          * However, we do have to enforce the order so that all writes through
          * the GTT land before any writes to the device, such as updates to
          * the GATT itself.
+ +       *
+ +       * We also have to wait a bit for the writes to land from the GTT.
+ +       * An uncached read (i.e. mmio) seems to be ideal for the round-trip
+ +       * timing. This issue has only been observed when switching quickly
+ +       * between GTT writes and CPU reads from inside the kernel on recent hw,
+ +       * and it appears to only affect discrete GTT blocks (i.e. on LLC
+ +       * system agents we cannot reproduce this behaviour).
          */
         wmb();
+ +      if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
+ +              POSTING_READ(RING_ACTHD(dev_priv->engine[RCS].mmio_base));
   
- -      old_write_domain = obj->base.write_domain;
- -      obj->base.write_domain = 0;
- -
- -      intel_fb_obj_flush(obj, false, ORIGIN_GTT);
+ +      intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
   
+ +      obj->base.write_domain = 0;
         trace_i915_gem_object_change_domain(obj,
                                             obj->base.read_domains,
- -                                          old_write_domain);
+ +                                          I915_GEM_DOMAIN_GTT);
   }
   
   /** Flushes the CPU write domain for the object if it's dirty. */
   static void
   i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
   {
- -      uint32_t old_write_domain;
- -
         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
                 return;
   
         if (i915_gem_clflush_object(obj, obj->pin_display))
                 i915_gem_chipset_flush(to_i915(obj->base.dev));
   
- -      old_write_domain = obj->base.write_domain;
- -      obj->base.write_domain = 0;
- -
         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
   
+ +      obj->base.write_domain = 0;
         trace_i915_gem_object_change_domain(obj,
                                             obj->base.read_domains,
- -                                          old_write_domain);
+ +                                          I915_GEM_DOMAIN_CPU);
+ +}
+ +
+ +static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+ +{
+ +      struct i915_vma *vma;
+ +
+ +      list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ +              if (!i915_vma_is_ggtt(vma))
+ +                      continue;
+ +
+ +              if (i915_vma_is_active(vma))
+ +                      continue;
+ +
+ +              if (!drm_mm_node_allocated(&vma->node))
+ +                      continue;
+ +
+ +              list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+ +      }
   }
   
   /**
@@@ -3223,16 -4032,20 +3226,16 @@@
   int
   i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
   {
- -      struct drm_device *dev = obj->base.dev;
- -      struct drm_i915_private *dev_priv = to_i915(dev);
- -      struct i915_ggtt *ggtt = &dev_priv->ggtt;
         uint32_t old_write_domain, old_read_domains;
- -      struct i915_vma *vma;
         int ret;
   
- -      if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
- -              return 0;
- -
         ret = i915_gem_object_wait_rendering(obj, !write);
         if (ret)
                 return ret;
   
+ +      if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
+ +              return 0;
+ +
         /* Flush and acquire obj->pages so that we are coherent through
          * direct access in memory with previous cached writes through
          * shmemfs and that our cache domain tracking remains valid.
@@@ -3273,7 -4086,10 +3276,7 @@@
                                             old_write_domain);
   
         /* And bump the LRU for this access */
- -      vma = i915_gem_obj_to_ggtt(obj);
- -      if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
- -              list_move_tail(&vma->vm_link,
- -                             &ggtt->base.inactive_list);
+ +      i915_gem_object_bump_inactive_ggtt(obj);
   
         return 0;
   }
@@@ -3296,7 -4112,9 +3299,7 @@@
   int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                                     enum i915_cache_level cache_level)
   {
- -      struct drm_device *dev = obj->base.dev;
- -      struct i915_vma *vma, *next;
- -      bool bound = false;
+ +      struct i915_vma *vma;
         int ret = 0;
   
         if (obj->cache_level == cache_level)
@@@ -3307,28 -4125,21 +3310,28 @@@
          * catch the issue of the CS prefetch crossing page boundaries and
          * reading an invalid PTE on older architectures.
          */
- -      list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
+ +restart:
+ +      list_for_each_entry(vma, &obj->vma_list, obj_link) {
                 if (!drm_mm_node_allocated(&vma->node))
                         continue;
   
- -              if (vma->pin_count) {
+ +              if (i915_vma_is_pinned(vma)) {
                         DRM_DEBUG("can not change the cache level of pinned objects\n");
                         return -EBUSY;
                 }
   
- -              if (!i915_gem_valid_gtt_space(vma, cache_level)) {
- -                      ret = i915_vma_unbind(vma);
- -                      if (ret)
- -                              return ret;
- -              } else
- -                      bound = true;
+ +              if (i915_gem_valid_gtt_space(vma, cache_level))
+ +                      continue;
+ +
+ +              ret = i915_vma_unbind(vma);
+ +              if (ret)
+ +                      return ret;
+ +
+ +              /* As unbinding may affect other elements in the
+ +               * obj->vma_list (due to side-effects from retiring
+ +               * an active vma), play safe and restart the iterator.
+ +               */
+ +              goto restart;
         }
   
         /* We can reuse the existing drm_mm nodes but need to change the
@@@ -3338,7 -4149,7 +3341,7 @@@
          * rewrite the PTE in the belief that doing so tramples upon less
          * state and so involves less work.
          */
- -      if (bound) {
+ +      if (obj->bind_count) {
                 /* Before we change the PTE, the GPU must not be accessing it.
                  * If we wait upon the object, we know that all the bound
                  * VMA are no longer active.
@@@ -3347,7 -4158,7 +3350,7 @@@
                 if (ret)
                         return ret;
   
- -              if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
+ +              if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
                         /* Access to snoopable pages through the GTT is
                          * incoherent and on some machines causes a hard
                          * lockup. Relinquish the CPU mmaping to force
@@@ -3364,11 -4175,9 +3367,11 @@@
                          * dropped the fence as all snoopable access is
                          * supposed to be linear.
                          */
- -                      ret = i915_gem_object_put_fence(obj);
- -                      if (ret)
- -                              return ret;
+ +                      list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ +                              ret = i915_vma_put_fence(vma);
+ +                              if (ret)
+ +                                      return ret;
+ +                      }
                 } else {
                         /* We either have incoherent backing store and
                          * so no GTT access or the architecture is fully
@@@ -3412,8 -4221,8 +3415,8 @@@ int i915_gem_get_caching_ioctl(struct d
         struct drm_i915_gem_caching *args = data;
         struct drm_i915_gem_object *obj;
   
- -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- -      if (&obj->base == NULL)
+ +      obj = i915_gem_object_lookup(file, args->handle);
+ +      if (!obj)
                 return -ENOENT;
   
         switch (obj->cache_level) {
@@@ -3431,7 -4240,7 +3434,7 @@@
                 break;
         }
   
- -      drm_gem_object_unreference_unlocked(&obj->base);
+ +      i915_gem_object_put_unlocked(obj);
         return 0;
   }
   
@@@ -3473,15 -4282,15 +3476,15 @@@ int i915_gem_set_caching_ioctl(struct d
         if (ret)
                 goto rpm_put;
   
- -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- -      if (&obj->base == NULL) {
+ +      obj = i915_gem_object_lookup(file, args->handle);
+ +      if (!obj) {
                 ret = -ENOENT;
                 goto unlock;
         }
   
         ret = i915_gem_object_set_cache_level(obj, level);
   
- -      drm_gem_object_unreference(&obj->base);
+ +      i915_gem_object_put(obj);
   unlock:
         mutex_unlock(&dev->struct_mutex);
   rpm_put:
@@@ -3495,12 -4304,11 +3498,12 @@@
    * Can be called from an uninterruptible phase (modesetting) and allows
    * any flushes to be pipelined (for pageflips).
    */
- -int
+ +struct i915_vma *
   i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
                                      u32 alignment,
                                      const struct i915_ggtt_view *view)
   {
+ +      struct i915_vma *vma;
         u32 old_read_domains, old_write_domain;
         int ret;
   
@@@ -3520,31 -4328,19 +3523,31 @@@
          */
         ret = i915_gem_object_set_cache_level(obj,
                                               HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
- -      if (ret)
+ +      if (ret) {
+ +              vma = ERR_PTR(ret);
                 goto err_unpin_display;
+ +      }
   
         /* As the user may map the buffer once pinned in the display plane
          * (e.g. libkms for the bootup splash), we have to ensure that we
- -       * always use map_and_fenceable for all scanout buffers.
+ +       * always use map_and_fenceable for all scanout buffers. However,
+ +       * it may simply be too big to fit into mappable, in which case
+ +       * put it anyway and hope that userspace can cope (but always first
+ +       * try to preserve the existing ABI).
          */
- -      ret = i915_gem_object_ggtt_pin(obj, view, alignment,
- -                                     view->type == I915_GGTT_VIEW_NORMAL ?
- -                                     PIN_MAPPABLE : 0);
- -      if (ret)
+ +      vma = ERR_PTR(-ENOSPC);
+ +      if (view->type == I915_GGTT_VIEW_NORMAL)
+ +              vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+ +                                             PIN_MAPPABLE | PIN_NONBLOCK);
+ +      if (IS_ERR(vma))
+ +              vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
+ +      if (IS_ERR(vma))
                 goto err_unpin_display;
   
+ +      vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+ +
+ +      WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
+ +
         i915_gem_object_flush_cpu_write_domain(obj);
   
         old_write_domain = obj->base.write_domain;
@@@ -3560,28 -4356,23 +3563,28 @@@
                                             old_read_domains,
                                             old_write_domain);
   
- -      return 0;
+ +      return vma;
   
   err_unpin_display:
         obj->pin_display--;
- -      return ret;
+ +      return vma;
   }
   
   void
- -i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
- -                                       const struct i915_ggtt_view *view)
+ +i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
   {
- -      if (WARN_ON(obj->pin_display == 0))
+ +      if (WARN_ON(vma->obj->pin_display == 0))
                 return;
   
- -      i915_gem_object_ggtt_unpin_view(obj, view);
+ +      if (--vma->obj->pin_display == 0)
+ +              vma->display_alignment = 0;
   
- -      obj->pin_display--;
+ +      /* Bump the LRU to try and avoid premature eviction whilst flipping  */
+ +      if (!i915_vma_is_active(vma))
+ +              list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+ +
+ +      i915_vma_unpin(vma);
+ +      WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
   }
   
   /**
@@@ -3598,13 -4389,13 +3601,13 @@@ i915_gem_object_set_to_cpu_domain(struc
         uint32_t old_write_domain, old_read_domains;
         int ret;
   
- -      if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
- -              return 0;
- -
         ret = i915_gem_object_wait_rendering(obj, !write);
         if (ret)
                 return ret;
   
+ +      if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ +              return 0;
+ +
         i915_gem_object_flush_gtt_write_domain(obj);
   
         old_write_domain = obj->base.write_domain;
@@@ -3679,31 -4470,28 +3682,31 @@@ i915_gem_ring_throttle(struct drm_devic
                 target = request;
         }
         if (target)
- -              i915_gem_request_reference(target);
+ +              i915_gem_request_get(target);
         spin_unlock(&file_priv->mm.lock);
   
         if (target == NULL)
                 return 0;
   
- -      ret = __i915_wait_request(target, true, NULL, NULL);
- -      i915_gem_request_unreference(target);
+ +      ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+ +      i915_gem_request_put(target);
   
         return ret;
   }
   
   static bool
- -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
+ +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
   {
- -      struct drm_i915_gem_object *obj = vma->obj;
+ +      if (!drm_mm_node_allocated(&vma->node))
+ +              return false;
+ +
+ +      if (vma->node.size < size)
+ +              return true;
   
- -      if (alignment &&
- -          vma->node.start & (alignment - 1))
+ +      if (alignment && vma->node.start & (alignment - 1))
                 return true;
   
- -      if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
+ +      if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
                 return true;
   
         if (flags & PIN_OFFSET_BIAS &&
@@@ -3720,208 -4508,135 +3723,208 @@@
   void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
   {
         struct drm_i915_gem_object *obj = vma->obj;
+ +      struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
         bool mappable, fenceable;
         u32 fence_size, fence_alignment;
   
- -      fence_size = i915_gem_get_gtt_size(obj->base.dev,
- -                                         obj->base.size,
- -                                         obj->tiling_mode);
- -      fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
- -                                                   obj->base.size,
- -                                                   obj->tiling_mode,
- -                                                   true);
+ +      fence_size = i915_gem_get_ggtt_size(dev_priv,
+ +                                          vma->size,
+ +                                          i915_gem_object_get_tiling(obj));
+ +      fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
+ +                                                    vma->size,
+ +                                                    i915_gem_object_get_tiling(obj),
+ +                                                    true);
   
         fenceable = (vma->node.size == fence_size &&
                      (vma->node.start & (fence_alignment - 1)) == 0);
   
         mappable = (vma->node.start + fence_size <=
- -                  to_i915(obj->base.dev)->ggtt.mappable_end);
+ +                  dev_priv->ggtt.mappable_end);
   
- -      obj->map_and_fenceable = mappable && fenceable;
+ +      if (mappable && fenceable)
+ +              vma->flags |= I915_VMA_CAN_FENCE;
+ +      else
+ +              vma->flags &= ~I915_VMA_CAN_FENCE;
   }
   
- -static int
- -i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
- -                     struct i915_address_space *vm,
- -                     const struct i915_ggtt_view *ggtt_view,
- -                     uint32_t alignment,
- -                     uint64_t flags)
+ +int __i915_vma_do_pin(struct i915_vma *vma,
+ +                    u64 size, u64 alignment, u64 flags)
   {
- -      struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- -      struct i915_vma *vma;
- -      unsigned bound;
+ +      unsigned int bound = vma->flags;
         int ret;
   
- -      if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
- -              return -ENODEV;
- -
- -      if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
- -              return -EINVAL;
- -
- -      if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
- -              return -EINVAL;
- -
- -      if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
- -              return -EINVAL;
- -
- -      vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
- -                        i915_gem_obj_to_vma(obj, vm);
- -
- -      if (vma) {
- -              if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
- -                      return -EBUSY;
+ +      GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
+ +      GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
   
- -              if (i915_vma_misplaced(vma, alignment, flags)) {
- -                      WARN(vma->pin_count,
- -                           "bo is already pinned in %s with incorrect alignment:"
- -                           " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
- -                           " obj->map_and_fenceable=%d\n",
- -                           ggtt_view ? "ggtt" : "ppgtt",
- -                           upper_32_bits(vma->node.start),
- -                           lower_32_bits(vma->node.start),
- -                           alignment,
- -                           !!(flags & PIN_MAPPABLE),
- -                           obj->map_and_fenceable);
- -                      ret = i915_vma_unbind(vma);
- -                      if (ret)
- -                              return ret;
- -
- -                      vma = NULL;
- -              }
+ +      if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
+ +              ret = -EBUSY;
+ +              goto err;
         }
   
- -      bound = vma ? vma->bound : 0;
- -      if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
- -              vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
- -                                               flags);
- -              if (IS_ERR(vma))
- -                      return PTR_ERR(vma);
- -      } else {
- -              ret = i915_vma_bind(vma, obj->cache_level, flags);
+ +      if ((bound & I915_VMA_BIND_MASK) == 0) {
+ +              ret = i915_vma_insert(vma, size, alignment, flags);
                 if (ret)
- -                      return ret;
+ +                      goto err;
         }
   
- -      if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
- -          (bound ^ vma->bound) & GLOBAL_BIND) {
+ +      ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
+ +      if (ret)
+ +              goto err;
+ +
+ +      if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
                 __i915_vma_set_map_and_fenceable(vma);
- -              WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
- -      }
   
- -      vma->pin_count++;
+ +      GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
         return 0;
- -}
   
- -int
- -i915_gem_object_pin(struct drm_i915_gem_object *obj,
- -                  struct i915_address_space *vm,
- -                  uint32_t alignment,
- -                  uint64_t flags)
- -{
- -      return i915_gem_object_do_pin(obj, vm,
- -                                    i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
- -                                    alignment, flags);
+ +err:
+ +      __i915_vma_unpin(vma);
+ +      return ret;
   }
   
- -int
+ +struct i915_vma *
   i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
                          const struct i915_ggtt_view *view,
- -                       uint32_t alignment,
- -                       uint64_t flags)
+ +                       u64 size,
+ +                       u64 alignment,
+ +                       u64 flags)
   {
- -      struct drm_device *dev = obj->base.dev;
- -      struct drm_i915_private *dev_priv = to_i915(dev);
- -      struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ +      struct i915_address_space *vm = &to_i915(obj->base.dev)->ggtt.base;
+ +      struct i915_vma *vma;
+ +      int ret;
+ +
+ +      vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
+ +      if (IS_ERR(vma))
+ +              return vma;
+ +
+ +      if (i915_vma_misplaced(vma, size, alignment, flags)) {
+ +              if (flags & PIN_NONBLOCK &&
+ +                  (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
+ +                      return ERR_PTR(-ENOSPC);
+ +
+ +              WARN(i915_vma_is_pinned(vma),
+ +                   "bo is already pinned in ggtt with incorrect alignment:"
+ +                   " offset=%08x, req.alignment=%llx,"
+ +                   " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
+ +                   i915_ggtt_offset(vma), alignment,
+ +                   !!(flags & PIN_MAPPABLE),
+ +                   i915_vma_is_map_and_fenceable(vma));
+ +              ret = i915_vma_unbind(vma);
+ +              if (ret)
+ +                      return ERR_PTR(ret);
+ +      }
+ +
+ +      ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+ +      if (ret)
+ +              return ERR_PTR(ret);
   
- -      BUG_ON(!view);
+ +      return vma;
+ +}
   
- -      return i915_gem_object_do_pin(obj, &ggtt->base, view,
- -                                    alignment, flags | PIN_GLOBAL);
+ +static __always_inline unsigned int __busy_read_flag(unsigned int id)
+ +{
+ +      /* Note that we could alias engines in the execbuf API, but
+ +       * that would be very unwise as it prevents userspace from
+ +       * fine control over engine selection. Ahem.
+ +       *
+ +       * This should be something like EXEC_MAX_ENGINE instead of
+ +       * I915_NUM_ENGINES.
+ +       */
+ +      BUILD_BUG_ON(I915_NUM_ENGINES > 16);
+ +      return 0x10000 << id;
   }
   
- -void
- -i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
- -                              const struct i915_ggtt_view *view)
+ +static __always_inline unsigned int __busy_write_id(unsigned int id)
   {
- -      struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
+ +      /* The uABI guarantees an active writer is also amongst the read
+ +       * engines. This would be true if we accessed the activity tracking
+ +       * under the lock, but as we perform the lookup of the object and
+ +       * its activity locklessly we can not guarantee that the last_write
+ +       * being active implies that we have set the same engine flag from
+ +       * last_read - hence we always set both read and write busy for
+ +       * last_write.
+ +       */
+ +      return id | __busy_read_flag(id);
+ +}
+ +
+ +static __always_inline unsigned int
+ +__busy_set_if_active(const struct i915_gem_active *active,
+ +                   unsigned int (*flag)(unsigned int id))
+ +{
+ +      struct drm_i915_gem_request *request;
+ +
+ +      request = rcu_dereference(active->request);
+ +      if (!request || i915_gem_request_completed(request))
+ +              return 0;
+ +
+ +      /* This is racy. See __i915_gem_active_get_rcu() for an in detail
+ +       * discussion of how to handle the race correctly, but for reporting
+ +       * the busy state we err on the side of potentially reporting the
+ +       * wrong engine as being busy (but we guarantee that the result
+ +       * is at least self-consistent).
+ +       *
+ +       * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
+ +       * whilst we are inspecting it, even under the RCU read lock as we are.
+ +       * This means that there is a small window for the engine and/or the
+ +       * seqno to have been overwritten. The seqno will always be in the
+ +       * future compared to the intended, and so we know that if that
+ +       * seqno is idle (on whatever engine) our request is idle and the
+ +       * return 0 above is correct.
+ +       *
+ +       * The issue is that if the engine is switched, it is just as likely
+ +       * to report that it is busy (but since the switch happened, we know
+ +       * the request should be idle). So there is a small chance that a busy
+ +       * result is actually the wrong engine.
+ +       *
+ +       * So why don't we care?
+ +       *
+ +       * For starters, the busy ioctl is a heuristic that is by definition
+ +       * racy. Even with perfect serialisation in the driver, the hardware
+ +       * state is constantly advancing - the state we report to the user
+ +       * is stale.
+ +       *
+ +       * The critical information for the busy-ioctl is whether the object
+ +       * is idle as userspace relies on that to detect whether its next
+ +       * access will stall, or if it has missed submitting commands to
+ +       * the hardware allowing the GPU to stall. We never generate a
+ +       * false-positive for idleness, thus busy-ioctl is reliable at the
+ +       * most fundamental level, and we maintain the guarantee that a
+ +       * busy object left to itself will eventually become idle (and stay
+ +       * idle!).
+ +       *
+ +       * We allow ourselves the leeway of potentially misreporting the busy
+ +       * state because that is an optimisation heuristic that is constantly
+ +       * in flux. Being quickly able to detect the busy/idle state is much
+ +       * more important than accurate logging of exactly which engines were
+ +       * busy.
+ +       *
+ +       * For accuracy in reporting the engine, we could use
+ +       *
+ +       *      result = 0;
+ +       *      request = __i915_gem_active_get_rcu(active);
+ +       *      if (request) {
+ +       *              if (!i915_gem_request_completed(request))
+ +       *                      result = flag(request->engine->exec_id);
+ +       *              i915_gem_request_put(request);
+ +       *      }
+ +       *
+ +       * but that still remains susceptible to both hardware and userspace
+ +       * races. So we accept making the result of that race slightly worse,
+ +       * given the rarity of the race and its low impact on the result.
+ +       */
+ +      return flag(READ_ONCE(request->engine->exec_id));
+ +}
   
- -      WARN_ON(vma->pin_count == 0);
- -      WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
+ +static __always_inline unsigned int
+ +busy_check_reader(const struct i915_gem_active *active)
+ +{
+ +      return __busy_set_if_active(active, __busy_read_flag);
+ +}
   
- -      --vma->pin_count;
+ +static __always_inline unsigned int
+ +busy_check_writer(const struct i915_gem_active *active)
+ +{
+ +      return __busy_set_if_active(active, __busy_write_id);
   }
   
   int
@@@ -3930,64 -4645,47 +3933,64 @@@ i915_gem_busy_ioctl(struct drm_device *
   {
         struct drm_i915_gem_busy *args = data;
         struct drm_i915_gem_object *obj;
- -      int ret;
+ +      unsigned long active;
   
- -      ret = i915_mutex_lock_interruptible(dev);
- -      if (ret)
- -              return ret;
+ +      obj = i915_gem_object_lookup(file, args->handle);
+ +      if (!obj)
+ +              return -ENOENT;
   
- -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- -      if (&obj->base == NULL) {
- -              ret = -ENOENT;
- -              goto unlock;
- -      }
+ +      args->busy = 0;
+ +      active = __I915_BO_ACTIVE(obj);
+ +      if (active) {
+ +              int idx;
   
- -      /* Count all active objects as busy, even if they are currently not used
- -       * by the gpu. Users of this interface expect objects to eventually
- -       * become non-busy without any further actions, therefore emit any
- -       * necessary flushes here.
- -       */
- -      ret = i915_gem_object_flush_active(obj);
- -      if (ret)
- -              goto unref;
+ +              /* Yes, the lookups are intentionally racy.
+ +               *
+ +               * First, we cannot simply rely on __I915_BO_ACTIVE. We have
+ +               * to regard the value as stale and as our ABI guarantees
+ +               * forward progress, we confirm the status of each active
+ +               * request with the hardware.
+ +               *
+ +               * Even though we guard the pointer lookup by RCU, that only
+ +               * guarantees that the pointer and its contents remain
+ +               * dereferencable and does *not* mean that the request we
+ +               * have is the same as the one being tracked by the object.
+ +               *
+ +               * Consider that we lookup the request just as it is being
+ +               * retired and freed. We take a local copy of the pointer,
+ +               * but before we add its engine into the busy set, the other
+ +               * thread reallocates it and assigns it to a task on another
+ +               * engine with a fresh and incomplete seqno. Guarding against
+ +               * that requires careful serialisation and reference counting,
+ +               * i.e. using __i915_gem_active_get_request_rcu(). We don't,
+ +               * instead we expect that if the result is busy, which engines
+ +               * are busy is not completely reliable - we only guarantee
+ +               * that the object was busy.
+ +               */
+ +              rcu_read_lock();
   
- -      args->busy = 0;
- -      if (obj->active) {
- -              int i;
+ +              for_each_active(active, idx)
+ +                      args->busy |= busy_check_reader(&obj->last_read[idx]);
   
- -              for (i = 0; i < I915_NUM_ENGINES; i++) {
- -                      struct drm_i915_gem_request *req;
+ +              /* For ABI sanity, we only care that the write engine is in
+ +               * the set of read engines. This should be ensured by the
+ +               * ordering of setting last_read/last_write in
+ +               * i915_vma_move_to_active(), and then in reverse in retire.
+ +               * However, for good measure, we always report the last_write
+ +               * request as a busy read as well as being a busy write.
+ +               *
+ +               * We don't care that the set of active read/write engines
+ +               * may change during construction of the result, as it is
+ +               * equally liable to change before userspace can inspect
+ +               * the result.
+ +               */
+ +              args->busy |= busy_check_writer(&obj->last_write);
   
- -                      req = obj->last_read_req[i];
- -                      if (req)
- -                              args->busy |= 1 << (16 + req->engine->exec_id);
- -              }
- -              if (obj->last_write_req)
- -                      args->busy |= obj->last_write_req->engine->exec_id;
+ +              rcu_read_unlock();
         }
   
- -unref:
- -      drm_gem_object_unreference(&obj->base);
- -unlock:
- -      mutex_unlock(&dev->struct_mutex);
- -      return ret;
+ +      i915_gem_object_put_unlocked(obj);
+ +      return 0;
   }
   
   int
@@@ -4018,14 -4716,19 +4021,14 @@@ i915_gem_madvise_ioctl(struct drm_devic
         if (ret)
                 return ret;
   
- -      obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle));
- -      if (&obj->base == NULL) {
+ +      obj = i915_gem_object_lookup(file_priv, args->handle);
+ +      if (!obj) {
                 ret = -ENOENT;
                 goto unlock;
         }
   
- -      if (i915_gem_obj_is_pinned(obj)) {
- -              ret = -EINVAL;
- -              goto out;
- -      }
- -
         if (obj->pages &&
- -          obj->tiling_mode != I915_TILING_NONE &&
+ +          i915_gem_object_is_tiled(obj) &&
             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
                 if (obj->madv == I915_MADV_WILLNEED)
                         i915_gem_object_unpin_pages(obj);
@@@ -4042,7 -4745,8 +4045,7 @@@
   
         args->retained = obj->madv != __I915_MADV_PURGED;
   
- -out:
- -      drm_gem_object_unreference(&obj->base);
+ +      i915_gem_object_put(obj);
   unlock:
         mutex_unlock(&dev->struct_mutex);
         return ret;
@@@ -4055,17 -4759,14 +4058,17 @@@ void i915_gem_object_init(struct drm_i9
   
         INIT_LIST_HEAD(&obj->global_list);
         for (i = 0; i < I915_NUM_ENGINES; i++)
- -              INIT_LIST_HEAD(&obj->engine_list[i]);
+ +              init_request_active(&obj->last_read[i],
+ +                                  i915_gem_object_retire__read);
+ +      init_request_active(&obj->last_write,
+ +                          i915_gem_object_retire__write);
         INIT_LIST_HEAD(&obj->obj_exec_link);
         INIT_LIST_HEAD(&obj->vma_list);
         INIT_LIST_HEAD(&obj->batch_pool_link);
   
         obj->ops = ops;
   
- -      obj->fence_reg = I915_FENCE_REG_NONE;
+ +      obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
         obj->madv = I915_MADV_WILLNEED;
   
         i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
@@@ -4170,31 -4871,33 +4173,31 @@@ void i915_gem_free_object(struct drm_ge
   
         trace_i915_gem_object_destroy(obj);
   
+ +      /* All file-owned VMA should have been released by this point through
+ +       * i915_gem_close_object(), or earlier by i915_gem_context_close().
+ +       * However, the object may also be bound into the global GTT (e.g.
+ +       * older GPUs without per-process support, or for direct access through
+ +       * the GTT either for the user or for scanout). Those VMA still need to
+ +       * unbound now.
+ +       */
         list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
- -              int ret;
- -
- -              vma->pin_count = 0;
- -              ret = i915_vma_unbind(vma);
- -              if (WARN_ON(ret == -ERESTARTSYS)) {
- -                      bool was_interruptible;
- -
- -                      was_interruptible = dev_priv->mm.interruptible;
- -                      dev_priv->mm.interruptible = false;
- -
- -                      WARN_ON(i915_vma_unbind(vma));
- -
- -                      dev_priv->mm.interruptible = was_interruptible;
- -              }
+ +              GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+ +              GEM_BUG_ON(i915_vma_is_active(vma));
+ +              vma->flags &= ~I915_VMA_PIN_MASK;
+ +              i915_vma_close(vma);
         }
+ +      GEM_BUG_ON(obj->bind_count);
   
         /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
          * before progressing. */
         if (obj->stolen)
                 i915_gem_object_unpin_pages(obj);
   
- -      WARN_ON(obj->frontbuffer_bits);
+ +      WARN_ON(atomic_read(&obj->frontbuffer_bits));
   
         if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
- -          obj->tiling_mode != I915_TILING_NONE)
+ +          i915_gem_object_is_tiled(obj))
                 i915_gem_object_unpin_pages(obj);
   
         if (WARN_ON(obj->pages_pin_count))
@@@ -4202,6 -4905,7 +4205,6 @@@
         if (discard_backing_storage(obj))
                 obj->madv = I915_MADV_DONTNEED;
         i915_gem_object_put_pages(obj);
- -      i915_gem_object_free_mmap_offset(obj);
   
         BUG_ON(obj->pages);
   
@@@ -4220,35 -4924,71 +4223,35 @@@
         intel_runtime_pm_put(dev_priv);
   }
   
- -struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
- -                                   struct i915_address_space *vm)
- -{
- -      struct i915_vma *vma;
- -      list_for_each_entry(vma, &obj->vma_list, obj_link) {
- -              if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
- -                  vma->vm == vm)
- -                      return vma;
- -      }
- -      return NULL;
- -}
- -
- -struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
- -                                         const struct i915_ggtt_view *view)
- -{
- -      struct i915_vma *vma;
- -
- -      GEM_BUG_ON(!view);
- -
- -      list_for_each_entry(vma, &obj->vma_list, obj_link)
- -              if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
- -                      return vma;
- -      return NULL;
- -}
- -
- -void i915_gem_vma_destroy(struct i915_vma *vma)
- -{
- -      WARN_ON(vma->node.allocated);
- -
- -      /* Keep the vma as a placeholder in the execbuffer reservation lists */
- -      if (!list_empty(&vma->exec_list))
- -              return;
- -
- -      if (!vma->is_ggtt)
- -              i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
- -
- -      list_del(&vma->obj_link);
- -
- -      kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
- -}
- -
- -static void
- -i915_gem_stop_engines(struct drm_device *dev)
+ +int i915_gem_suspend(struct drm_device *dev)
   {
         struct drm_i915_private *dev_priv = to_i915(dev);
- -      struct intel_engine_cs *engine;
- -
- -      for_each_engine(engine, dev_priv)
- -              dev_priv->gt.stop_engine(engine);
- -}
+ +      int ret;
   
- -int
- -i915_gem_suspend(struct drm_device *dev)
- -{
- -      struct drm_i915_private *dev_priv = to_i915(dev);
- -      int ret = 0;
+ +      intel_suspend_gt_powersave(dev_priv);
   
         mutex_lock(&dev->struct_mutex);
- -      ret = i915_gem_wait_for_idle(dev_priv);
+ +
+ +      /* We have to flush all the executing contexts to main memory so
+ +       * that they can saved in the hibernation image. To ensure the last
+ +       * context image is coherent, we have to switch away from it. That
+ +       * leaves the dev_priv->kernel_context still active when
+ +       * we actually suspend, and its image in memory may not match the GPU
+ +       * state. Fortunately, the kernel_context is disposable and we do
+ +       * not rely on its state.
+ +       */
+ +      ret = i915_gem_switch_to_kernel_context(dev_priv);
+ +      if (ret)
+ +              goto err;
+ +
+ +      ret = i915_gem_wait_for_idle(dev_priv,
+ +                                   I915_WAIT_INTERRUPTIBLE |
+ +                                   I915_WAIT_LOCKED);
         if (ret)
                 goto err;
   
         i915_gem_retire_requests(dev_priv);
   
- -      i915_gem_stop_engines(dev);
         i915_gem_context_lost(dev_priv);
         mutex_unlock(&dev->struct_mutex);
   
@@@ -4268,22 -5008,6 +4271,22 @@@ err
         return ret;
   }
   
+ +void i915_gem_resume(struct drm_device *dev)
+ +{
+ +      struct drm_i915_private *dev_priv = to_i915(dev);
+ +
+ +      mutex_lock(&dev->struct_mutex);
+ +      i915_gem_restore_gtt_mappings(dev);
+ +
+ +      /* As we didn't flush the kernel context before suspend, we cannot
+ +       * guarantee that the context image is complete. So let's just reset
+ +       * it and start again.
+ +       */
+ +      dev_priv->gt.resume(dev_priv);
+ +
+ +      mutex_unlock(&dev->struct_mutex);
+ +}
+ +
   void i915_gem_init_swizzling(struct drm_device *dev)
   {
         struct drm_i915_private *dev_priv = to_i915(dev);
@@@ -4336,6 -5060,53 +4339,6 @@@ static void init_unused_rings(struct dr
         }
   }
   
- -int i915_gem_init_engines(struct drm_device *dev)
- -{
- -      struct drm_i915_private *dev_priv = to_i915(dev);
- -      int ret;
- -
- -      ret = intel_init_render_ring_buffer(dev);
- -      if (ret)
- -              return ret;
- -
- -      if (HAS_BSD(dev)) {
- -              ret = intel_init_bsd_ring_buffer(dev);
- -              if (ret)
- -                      goto cleanup_render_ring;
- -      }
- -
- -      if (HAS_BLT(dev)) {
- -              ret = intel_init_blt_ring_buffer(dev);
- -              if (ret)
- -                      goto cleanup_bsd_ring;
- -      }
- -
- -      if (HAS_VEBOX(dev)) {
- -              ret = intel_init_vebox_ring_buffer(dev);
- -              if (ret)
- -                      goto cleanup_blt_ring;
- -      }
- -
- -      if (HAS_BSD2(dev)) {
- -              ret = intel_init_bsd2_ring_buffer(dev);
- -              if (ret)
- -                      goto cleanup_vebox_ring;
- -      }
- -
- -      return 0;
- -
- -cleanup_vebox_ring:
- -      intel_cleanup_engine(&dev_priv->engine[VECS]);
- -cleanup_blt_ring:
- -      intel_cleanup_engine(&dev_priv->engine[BCS]);
- -cleanup_bsd_ring:
- -      intel_cleanup_engine(&dev_priv->engine[VCS]);
- -cleanup_render_ring:
- -      intel_cleanup_engine(&dev_priv->engine[RCS]);
- -
- -      return ret;
- -}
- -
   int
   i915_gem_init_hw(struct drm_device *dev)
   {
@@@ -4402,27 -5173,6 +4405,27 @@@ out
         return ret;
   }
   
+ +bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
+ +{
+ +      if (INTEL_INFO(dev_priv)->gen < 6)
+ +              return false;
+ +
+ +      /* TODO: make semaphores and Execlists play nicely together */
+ +      if (i915.enable_execlists)
+ +              return false;
+ +
+ +      if (value >= 0)
+ +              return value;
+ +
+ +#ifdef CONFIG_INTEL_IOMMU
+ +      /* Enable semaphores on SNB when IO remapping is off */
+ +      if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
+ +              return false;
+ +#endif
+ +
+ +      return true;
+ +}
+ +
   int i915_gem_init(struct drm_device *dev)
   {
         struct drm_i915_private *dev_priv = to_i915(dev);
@@@ -4431,11 -5181,15 +4434,11 @@@
         mutex_lock(&dev->struct_mutex);
   
         if (!i915.enable_execlists) {
- -              dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
- -              dev_priv->gt.init_engines = i915_gem_init_engines;
- -              dev_priv->gt.cleanup_engine = intel_cleanup_engine;
- -              dev_priv->gt.stop_engine = intel_stop_engine;
+ +              dev_priv->gt.resume = intel_legacy_submission_resume;
+ +              dev_priv->gt.cleanup_engine = intel_engine_cleanup;
         } else {
- -              dev_priv->gt.execbuf_submit = intel_execlists_submission;
- -              dev_priv->gt.init_engines = intel_logical_rings_init;
+ +              dev_priv->gt.resume = intel_lr_context_resume;
                 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
- -              dev_priv->gt.stop_engine = intel_logical_ring_stop;
         }
   
         /* This is just a security blanket to placate dragons.
@@@ -4447,27 -5201,24 +4450,27 @@@
         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
   
         i915_gem_init_userptr(dev_priv);
- -      i915_gem_init_ggtt(dev);
+ +
+ +      ret = i915_gem_init_ggtt(dev_priv);
+ +      if (ret)
+ +              goto out_unlock;
   
         ret = i915_gem_context_init(dev);
         if (ret)
                 goto out_unlock;
   
- -      ret = dev_priv->gt.init_engines(dev);
+ +      ret = intel_engines_init(dev);
         if (ret)
                 goto out_unlock;
   
         ret = i915_gem_init_hw(dev);
         if (ret == -EIO) {
- -              /* Allow ring initialisation to fail by marking the GPU as
+ +              /* Allow engine initialisation to fail by marking the GPU as
                  * wedged. But we only want to do this where the GPU is angry,
                  * for all other failure, such as an allocation failure, bail.
                  */
                 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
- -              atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+ +              i915_gem_set_wedged(dev_priv);
                 ret = 0;
         }
   
@@@ -4491,6 -5242,7 +4494,6 @@@ i915_gem_cleanup_engines(struct drm_dev
   static void
   init_engine_lists(struct intel_engine_cs *engine)
   {
- -      INIT_LIST_HEAD(&engine->active_list);
         INIT_LIST_HEAD(&engine->request_list);
   }
   
@@@ -4498,7 -5250,6 +4501,7 @@@ voi
   i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
   {
         struct drm_device *dev = &dev_priv->drm;
+ +      int i;
   
         if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
             !IS_CHERRYVIEW(dev_priv))
@@@ -4514,13 -5265,6 +4517,13 @@@
                                 I915_READ(vgtif_reg(avail_rs.fence_num));
   
         /* Initialize fence registers to zero */
+ +      for (i = 0; i < dev_priv->num_fence_regs; i++) {
+ +              struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
+ +
+ +              fence->i915 = dev_priv;
+ +              fence->id = i;
+ +              list_add_tail(&fence->link, &dev_priv->mm.fence_list);
+ +      }
         i915_gem_restore_fences(dev);
   
         i915_gem_detect_bit_6_swizzle(dev);
@@@ -4545,17 -5289,18 +4548,17 @@@ i915_gem_load_init(struct drm_device *d
         dev_priv->requests =
                 kmem_cache_create("i915_gem_request",
                                   sizeof(struct drm_i915_gem_request), 0,
- -                                SLAB_HWCACHE_ALIGN,
+ +                                SLAB_HWCACHE_ALIGN |
+ +                                SLAB_RECLAIM_ACCOUNT |
+ +                                SLAB_DESTROY_BY_RCU,
                                   NULL);
   
- -      INIT_LIST_HEAD(&dev_priv->vm_list);
         INIT_LIST_HEAD(&dev_priv->context_list);
         INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
         INIT_LIST_HEAD(&dev_priv->mm.bound_list);
         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
         for (i = 0; i < I915_NUM_ENGINES; i++)
                 init_engine_lists(&dev_priv->engine[i]);
- -      for (i = 0; i < I915_MAX_NUM_FENCES; i++)
- -              INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
         INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
                           i915_gem_retire_work_handler);
         INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
@@@ -4565,13 -5310,13 +4568,13 @@@
   
         dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
   
- -      INIT_LIST_HEAD(&dev_priv->mm.fence_list);
- -
         init_waitqueue_head(&dev_priv->pending_flip_queue);
   
         dev_priv->mm.interruptible = true;
   
- -      mutex_init(&dev_priv->fb_tracking.lock);
+ +      atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
+ +
+ +      spin_lock_init(&dev_priv->fb_tracking.lock);
   }
   
   void i915_gem_load_cleanup(struct drm_device *dev)
@@@ -4581,19 -5326,11 +4584,19 @@@
         kmem_cache_destroy(dev_priv->requests);
         kmem_cache_destroy(dev_priv->vmas);
         kmem_cache_destroy(dev_priv->objects);
+ +
+ +      /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
+ +      rcu_barrier();
   }
   
   int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
   {
         struct drm_i915_gem_object *obj;
+ +      struct list_head *phases[] = {
+ +              &dev_priv->mm.unbound_list,
+ +              &dev_priv->mm.bound_list,
+ +              NULL
+ +      }, **p;
   
         /* Called just before we write the hibernation image.
          *
@@@ -4604,18 -5341,16 +4607,18 @@@
          *
          * To make sure the hibernation image contains the latest state,
          * we update that state just before writing out the image.
+ +       *
+ +       * To try and reduce the hibernation image, we manually shrink
+ +       * the objects as well.
          */
   
- -      list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
- -              obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- -              obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- -      }
+ +      i915_gem_shrink_all(dev_priv);
   
- -      list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- -              obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- -              obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ +      for (p = phases; *p; p++) {
+ +              list_for_each_entry(obj, *p, global_list) {
+ +                      obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ +                      obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ +              }
         }
   
         return 0;
@@@ -4624,15 -5359,21 +4627,15 @@@
   void i915_gem_release(struct drm_device *dev, struct drm_file *file)
   {
         struct drm_i915_file_private *file_priv = file->driver_priv;
+ +      struct drm_i915_gem_request *request;
   
         /* Clean up our request list when the client is going away, so that
          * later retire_requests won't dereference our soon-to-be-gone
          * file_priv.
          */
         spin_lock(&file_priv->mm.lock);
- -      while (!list_empty(&file_priv->mm.request_list)) {
- -              struct drm_i915_gem_request *request;
- -
- -              request = list_first_entry(&file_priv->mm.request_list,
- -                                         struct drm_i915_gem_request,
- -                                         client_list);
- -              list_del(&request->client_list);
+ +      list_for_each_entry(request, &file_priv->mm.request_list, client_list)
                 request->file_priv = NULL;
- -      }
         spin_unlock(&file_priv->mm.lock);
   
         if (!list_empty(&file_priv->rps.link)) {
@@@ -4661,7 -5402,7 +4664,7 @@@ int i915_gem_open(struct drm_device *de
         spin_lock_init(&file_priv->mm.lock);
         INIT_LIST_HEAD(&file_priv->mm.request_list);
   
- -      file_priv->bsd_ring = -1;
+ +      file_priv->bsd_engine = -1;
   
         ret = i915_gem_context_open(dev, file);
         if (ret)
@@@ -4683,24 -5424,118 +4686,24 @@@ void i915_gem_track_fb(struct drm_i915_
                        struct drm_i915_gem_object *new,
                        unsigned frontbuffer_bits)
   {
+ +      /* Control of individual bits within the mask are guarded by
+ +       * the owning plane->mutex, i.e. we can never see concurrent
+ +       * manipulation of individual bits. But since the bitfield as a whole
+ +       * is updated using RMW, we need to use atomics in order to update
+ +       * the bits.
+ +       */
+ +      BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
+ +                   sizeof(atomic_t) * BITS_PER_BYTE);
+ +
         if (old) {
- -              WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
- -              WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
- -              old->frontbuffer_bits &= ~frontbuffer_bits;
+ +              WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
+ +              atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
         }
   
         if (new) {
- -              WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
- -              WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
- -              new->frontbuffer_bits |= frontbuffer_bits;
- -      }
- -}
- -
- -/* All the new VM stuff */
- -u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
- -                      struct i915_address_space *vm)
- -{
- -      struct drm_i915_private *dev_priv = to_i915(o->base.dev);
- -      struct i915_vma *vma;
- -
- -      WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
- -
- -      list_for_each_entry(vma, &o->vma_list, obj_link) {
- -              if (vma->is_ggtt &&
- -                  vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
- -                      continue;
- -              if (vma->vm == vm)
- -                      return vma->node.start;
- -      }
- -
- -      WARN(1, "%s vma for this object not found.\n",
- -           i915_is_ggtt(vm) ? "global" : "ppgtt");
- -      return -1;
- -}
- -
- -u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
- -                                const struct i915_ggtt_view *view)
- -{
- -      struct i915_vma *vma;
- -
- -      list_for_each_entry(vma, &o->vma_list, obj_link)
- -              if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
- -                      return vma->node.start;
- -
- -      WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
- -      return -1;
- -}
- -
- -bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
- -                      struct i915_address_space *vm)
- -{
- -      struct i915_vma *vma;
- -
- -      list_for_each_entry(vma, &o->vma_list, obj_link) {
- -              if (vma->is_ggtt &&
- -                  vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
- -                      continue;
- -              if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
- -                      return true;
+ +              WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
+ +              atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
         }
- -
- -      return false;
- -}
- -
- -bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
- -                                const struct i915_ggtt_view *view)
- -{
- -      struct i915_vma *vma;
- -
- -      list_for_each_entry(vma, &o->vma_list, obj_link)
- -              if (vma->is_ggtt &&
- -                  i915_ggtt_view_equal(&vma->ggtt_view, view) &&
- -                  drm_mm_node_allocated(&vma->node))
- -                      return true;
- -
- -      return false;
- -}
- -
- -bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
- -{
- -      struct i915_vma *vma;
- -
- -      list_for_each_entry(vma, &o->vma_list, obj_link)
- -              if (drm_mm_node_allocated(&vma->node))
- -                      return true;
- -
- -      return false;
- -}
- -
- -unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
- -{
- -      struct i915_vma *vma;
- -
- -      GEM_BUG_ON(list_empty(&o->vma_list));
- -
- -      list_for_each_entry(vma, &o->vma_list, obj_link) {
- -              if (vma->is_ggtt &&
- -                  vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
- -                      return vma->node.size;
- -      }
- -
- -      return 0;
- -}
- -
- -bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
- -{
- -      struct i915_vma *vma;
- -      list_for_each_entry(vma, &obj->vma_list, obj_link)
- -              if (vma->pin_count > 0)
- -                      return true;
- -
- -      return false;
   }
   
   /* Like i915_gem_object_get_page(), but mark the returned page dirty */
@@@ -4755,6 -5590,6 +4758,6 @@@ i915_gem_object_create_from_data(struc
         return obj;
   
   fail:
- -      drm_gem_object_unreference(&obj->base);
+ +      i915_gem_object_put(obj);
         return ERR_PTR(ret);
   }
diff --combined drivers/gpu/drm/i915/intel_display.c

index 497d99b8846883692ea0be1bb818b29319a5c15e,175595fc3e45d239ce528662696167bac4c73327..8d4c35d55b1bbd4e2848b5bf4d69a0abbdd05cbc
--- 1/drivers/gpu/drm/i915/intel_display.c
--- 2/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@@ -34,7 -34,6 +34,7 @@@
   #include <drm/drm_edid.h>
   #include <drm/drmP.h>
   #include "intel_drv.h"
+ +#include "intel_frontbuffer.h"
   #include <drm/i915_drm.h>
   #include "i915_drv.h"
   #include "i915_gem_dmabuf.h"
@@@ -1202,8 -1201,8 +1202,8 @@@ void assert_panel_unlocked(struct drm_i
         if (HAS_PCH_SPLIT(dev)) {
                 u32 port_sel;
   
- -              pp_reg = PCH_PP_CONTROL;
- -              port_sel = I915_READ(PCH_PP_ON_DELAYS) & PANEL_PORT_SELECT_MASK;
+ +              pp_reg = PP_CONTROL(0);
+ +              port_sel = I915_READ(PP_ON_DELAYS(0)) & PANEL_PORT_SELECT_MASK;
   
                 if (port_sel == PANEL_PORT_SELECT_LVDS &&
                     I915_READ(PCH_LVDS) & LVDS_PIPEB_SELECT)
@@@ -1211,10 -1210,10 +1211,10 @@@
                 /* XXX: else fix for eDP */
         } else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
                 /* presumably write lock depends on pipe, not port select */
- -              pp_reg = VLV_PIPE_PP_CONTROL(pipe);
+ +              pp_reg = PP_CONTROL(pipe);
                 panel_pipe = pipe;
         } else {
- -              pp_reg = PP_CONTROL;
+ +              pp_reg = PP_CONTROL(0);
                 if (I915_READ(LVDS) & LVDS_PIPEB_SELECT)
                         panel_pipe = PIPE_B;
         }
@@@ -1907,7 -1906,7 +1907,7 @@@ static void ironlake_disable_pch_transc
         }
   }
   
- -static void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv)
+ +void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv)
   {
         u32 val;
   
@@@ -1959,12 -1958,12 +1959,12 @@@ static void intel_enable_pipe(struct in
          * a plane.  On ILK+ the pipe PLLs are integrated, so we don't
          * need the check.
          */
- -      if (HAS_GMCH_DISPLAY(dev_priv))
+ +      if (HAS_GMCH_DISPLAY(dev_priv)) {
                 if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI))
                         assert_dsi_pll_enabled(dev_priv);
                 else
                         assert_pll_enabled(dev_priv, pipe);
- -      else {
+ +      } else {
                 if (crtc->config->has_pch_encoder) {
                         /* if driving the PCH, we need FDI enabled */
                         assert_fdi_rx_pll_enabled(dev_priv, pch_transcoder);
@@@ -2147,6 -2146,33 +2147,6 @@@ intel_fill_fb_ggtt_view(struct i915_ggt
         }
   }
   
- -static void
- -intel_fill_fb_info(struct drm_i915_private *dev_priv,
- -                 struct drm_framebuffer *fb)
- -{
- -      struct intel_rotation_info *info = &to_intel_framebuffer(fb)->rot_info;
- -      unsigned int tile_size, tile_width, tile_height, cpp;
- -
- -      tile_size = intel_tile_size(dev_priv);
- -
- -      cpp = drm_format_plane_cpp(fb->pixel_format, 0);
- -      intel_tile_dims(dev_priv, &tile_width, &tile_height,
- -                      fb->modifier[0], cpp);
- -
- -      info->plane[0].width = DIV_ROUND_UP(fb->pitches[0], tile_width * cpp);
- -      info->plane[0].height = DIV_ROUND_UP(fb->height, tile_height);
- -
- -      if (info->pixel_format == DRM_FORMAT_NV12) {
- -              cpp = drm_format_plane_cpp(fb->pixel_format, 1);
- -              intel_tile_dims(dev_priv, &tile_width, &tile_height,
- -                              fb->modifier[1], cpp);
- -
- -              info->uv_offset = fb->offsets[1];
- -              info->plane[1].width = DIV_ROUND_UP(fb->pitches[1], tile_width * cpp);
- -              info->plane[1].height = DIV_ROUND_UP(fb->height / 2, tile_height);
- -      }
- -}
- -
   static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv)
   {
         if (INTEL_INFO(dev_priv)->gen >= 9)
@@@ -2179,15 -2205,16 +2179,15 @@@ static unsigned int intel_surf_alignmen
         }
   }
   
- -int
- -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
- -                         unsigned int rotation)
+ +struct i915_vma *
+ +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
   {
         struct drm_device *dev = fb->dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct drm_i915_gem_object *obj = intel_fb_obj(fb);
         struct i915_ggtt_view view;
+ +      struct i915_vma *vma;
         u32 alignment;
- -      int ret;
   
         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
   
@@@ -2212,112 -2239,75 +2212,112 @@@
          */
         intel_runtime_pm_get(dev_priv);
   
- -      ret = i915_gem_object_pin_to_display_plane(obj, alignment,
- -                                                 &view);
- -      if (ret)
- -              goto err_pm;
- -
- -      /* Install a fence for tiled scan-out. Pre-i965 always needs a
- -       * fence, whereas 965+ only requires a fence if using
- -       * framebuffer compression.  For simplicity, we always install
- -       * a fence as the cost is not that onerous.
- -       */
- -      if (view.type == I915_GGTT_VIEW_NORMAL) {
- -              ret = i915_gem_object_get_fence(obj);
- -              if (ret == -EDEADLK) {
- -                      /*
- -                       * -EDEADLK means there are no free fences
- -                       * no pending flips.
- -                       *
- -                       * This is propagated to atomic, but it uses
- -                       * -EDEADLK to force a locking recovery, so
- -                       * change the returned error to -EBUSY.
- -                       */
- -                      ret = -EBUSY;
- -                      goto err_unpin;
- -              } else if (ret)
- -                      goto err_unpin;
+ +      vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
+ +      if (IS_ERR(vma))
+ +              goto err;
   
- -              i915_gem_object_pin_fence(obj);
+ +      if (i915_vma_is_map_and_fenceable(vma)) {
+ +              /* Install a fence for tiled scan-out. Pre-i965 always needs a
+ +               * fence, whereas 965+ only requires a fence if using
+ +               * framebuffer compression.  For simplicity, we always, when
+ +               * possible, install a fence as the cost is not that onerous.
+ +               *
+ +               * If we fail to fence the tiled scanout, then either the
+ +               * modeset will reject the change (which is highly unlikely as
+ +               * the affected systems, all but one, do not have unmappable
+ +               * space) or we will not be able to enable full powersaving
+ +               * techniques (also likely not to apply due to various limits
+ +               * FBC and the like impose on the size of the buffer, which
+ +               * presumably we violated anyway with this unmappable buffer).
+ +               * Anyway, it is presumably better to stumble onwards with
+ +               * something and try to run the system in a "less than optimal"
+ +               * mode that matches the user configuration.
+ +               */
+ +              if (i915_vma_get_fence(vma) == 0)
+ +                      i915_vma_pin_fence(vma);
         }
   
+ +err:
         intel_runtime_pm_put(dev_priv);
- -      return 0;
- -
- -err_unpin:
- -      i915_gem_object_unpin_from_display_plane(obj, &view);
- -err_pm:
- -      intel_runtime_pm_put(dev_priv);
- -      return ret;
+ +      return vma;
   }
   
   void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
   {
         struct drm_i915_gem_object *obj = intel_fb_obj(fb);
         struct i915_ggtt_view view;
+ +      struct i915_vma *vma;
   
         WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
   
         intel_fill_fb_ggtt_view(&view, fb, rotation);
+ +      vma = i915_gem_object_to_ggtt(obj, &view);
   
- -      if (view.type == I915_GGTT_VIEW_NORMAL)
- -              i915_gem_object_unpin_fence(obj);
+ +      i915_vma_unpin_fence(vma);
+ +      i915_gem_object_unpin_from_display_plane(vma);
+ +}
   
- -      i915_gem_object_unpin_from_display_plane(obj, &view);
+ +static int intel_fb_pitch(const struct drm_framebuffer *fb, int plane,
+ +                        unsigned int rotation)
+ +{
+ +      if (intel_rotation_90_or_270(rotation))
+ +              return to_intel_framebuffer(fb)->rotated[plane].pitch;
+ +      else
+ +              return fb->pitches[plane];
+ +}
+ +
+ +/*
+ + * Convert the x/y offsets into a linear offset.
+ + * Only valid with 0/180 degree rotation, which is fine since linear
+ + * offset is only used with linear buffers on pre-hsw and tiled buffers
+ + * with gen2/3, and 90/270 degree rotations isn't supported on any of them.
+ + */
+ +u32 intel_fb_xy_to_linear(int x, int y,
+ +                        const struct intel_plane_state *state,
+ +                        int plane)
+ +{
+ +      const struct drm_framebuffer *fb = state->base.fb;
+ +      unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+ +      unsigned int pitch = fb->pitches[plane];
+ +
+ +      return y * pitch + x * cpp;
+ +}
+ +
+ +/*
+ + * Add the x/y offsets derived from fb->offsets[] to the user
+ + * specified plane src x/y offsets. The resulting x/y offsets
+ + * specify the start of scanout from the beginning of the gtt mapping.
+ + */
+ +void intel_add_fb_offsets(int *x, int *y,
+ +                        const struct intel_plane_state *state,
+ +                        int plane)
+ +
+ +{
+ +      const struct intel_framebuffer *intel_fb = to_intel_framebuffer(state->base.fb);
+ +      unsigned int rotation = state->base.rotation;
+ +
+ +      if (intel_rotation_90_or_270(rotation)) {
+ +              *x += intel_fb->rotated[plane].x;
+ +              *y += intel_fb->rotated[plane].y;
+ +      } else {
+ +              *x += intel_fb->normal[plane].x;
+ +              *y += intel_fb->normal[plane].y;
+ +      }
   }
   
   /*
- - * Adjust the tile offset by moving the difference into
- - * the x/y offsets.
- - *
    * Input tile dimensions and pitch must already be
    * rotated to match x and y, and in pixel units.
    */
- -static u32 intel_adjust_tile_offset(int *x, int *y,
- -                                  unsigned int tile_width,
- -                                  unsigned int tile_height,
- -                                  unsigned int tile_size,
- -                                  unsigned int pitch_tiles,
- -                                  u32 old_offset,
- -                                  u32 new_offset)
- -{
+ +static u32 _intel_adjust_tile_offset(int *x, int *y,
+ +                                   unsigned int tile_width,
+ +                                   unsigned int tile_height,
+ +                                   unsigned int tile_size,
+ +                                   unsigned int pitch_tiles,
+ +                                   u32 old_offset,
+ +                                   u32 new_offset)
+ +{
+ +      unsigned int pitch_pixels = pitch_tiles * tile_width;
         unsigned int tiles;
   
         WARN_ON(old_offset & (tile_size - 1));
@@@ -2329,54 -2319,6 +2329,54 @@@
         *y += tiles / pitch_tiles * tile_height;
         *x += tiles % pitch_tiles * tile_width;
   
+ +      /* minimize x in case it got needlessly big */
+ +      *y += *x / pitch_pixels * tile_height;
+ +      *x %= pitch_pixels;
+ +
+ +      return new_offset;
+ +}
+ +
+ +/*
+ + * Adjust the tile offset by moving the difference into
+ + * the x/y offsets.
+ + */
+ +static u32 intel_adjust_tile_offset(int *x, int *y,
+ +                                  const struct intel_plane_state *state, int plane,
+ +                                  u32 old_offset, u32 new_offset)
+ +{
+ +      const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
+ +      const struct drm_framebuffer *fb = state->base.fb;
+ +      unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+ +      unsigned int rotation = state->base.rotation;
+ +      unsigned int pitch = intel_fb_pitch(fb, plane, rotation);
+ +
+ +      WARN_ON(new_offset > old_offset);
+ +
+ +      if (fb->modifier[plane] != DRM_FORMAT_MOD_NONE) {
+ +              unsigned int tile_size, tile_width, tile_height;
+ +              unsigned int pitch_tiles;
+ +
+ +              tile_size = intel_tile_size(dev_priv);
+ +              intel_tile_dims(dev_priv, &tile_width, &tile_height,
+ +                              fb->modifier[plane], cpp);
+ +
+ +              if (intel_rotation_90_or_270(rotation)) {
+ +                      pitch_tiles = pitch / tile_height;
+ +                      swap(tile_width, tile_height);
+ +              } else {
+ +                      pitch_tiles = pitch / (tile_width * cpp);
+ +              }
+ +
+ +              _intel_adjust_tile_offset(x, y, tile_width, tile_height,
+ +                                        tile_size, pitch_tiles,
+ +                                        old_offset, new_offset);
+ +      } else {
+ +              old_offset += *y * pitch + *x * cpp;
+ +
+ +              *y = (old_offset - new_offset) / pitch;
+ +              *x = ((old_offset - new_offset) - *y * pitch) / cpp;
+ +      }
+ +
         return new_offset;
   }
   
@@@ -2387,24 -2329,18 +2387,24 @@@
    * In the 90/270 rotated case, x and y are assumed
    * to be already rotated to match the rotated GTT view, and
    * pitch is the tile_height aligned framebuffer height.
+ + *
+ + * This function is used when computing the derived information
+ + * under intel_framebuffer, so using any of that information
+ + * here is not allowed. Anything under drm_framebuffer can be
+ + * used. This is why the user has to pass in the pitch since it
+ + * is specified in the rotated orientation.
    */
- -u32 intel_compute_tile_offset(int *x, int *y,
- -                            const struct drm_framebuffer *fb, int plane,
- -                            unsigned int pitch,
- -                            unsigned int rotation)
+ +static u32 _intel_compute_tile_offset(const struct drm_i915_private *dev_priv,
+ +                                    int *x, int *y,
+ +                                    const struct drm_framebuffer *fb, int plane,
+ +                                    unsigned int pitch,
+ +                                    unsigned int rotation,
+ +                                    u32 alignment)
   {
- -      const struct drm_i915_private *dev_priv = to_i915(fb->dev);
         uint64_t fb_modifier = fb->modifier[plane];
         unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
- -      u32 offset, offset_aligned, alignment;
+ +      u32 offset, offset_aligned;
   
- -      alignment = intel_surf_alignment(dev_priv, fb_modifier);
         if (alignment)
                 alignment--;
   
@@@ -2432,9 -2368,9 +2432,9 @@@
                 offset = (tile_rows * pitch_tiles + tiles) * tile_size;
                 offset_aligned = offset & ~alignment;
   
- -              intel_adjust_tile_offset(x, y, tile_width, tile_height,
- -                                       tile_size, pitch_tiles,
- -                                       offset, offset_aligned);
+ +              _intel_adjust_tile_offset(x, y, tile_width, tile_height,
+ +                                        tile_size, pitch_tiles,
+ +                                        offset, offset_aligned);
         } else {
                 offset = *y * pitch + *x * cpp;
                 offset_aligned = offset & ~alignment;
@@@ -2446,177 -2382,6 +2446,177 @@@
         return offset_aligned;
   }
   
+ +u32 intel_compute_tile_offset(int *x, int *y,
+ +                            const struct intel_plane_state *state,
+ +                            int plane)
+ +{
+ +      const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
+ +      const struct drm_framebuffer *fb = state->base.fb;
+ +      unsigned int rotation = state->base.rotation;
+ +      int pitch = intel_fb_pitch(fb, plane, rotation);
+ +      u32 alignment;
+ +
+ +      /* AUX_DIST needs only 4K alignment */
+ +      if (fb->pixel_format == DRM_FORMAT_NV12 && plane == 1)
+ +              alignment = 4096;
+ +      else
+ +              alignment = intel_surf_alignment(dev_priv, fb->modifier[plane]);
+ +
+ +      return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch,
+ +                                        rotation, alignment);
+ +}
+ +
+ +/* Convert the fb->offset[] linear offset into x/y offsets */
+ +static void intel_fb_offset_to_xy(int *x, int *y,
+ +                                const struct drm_framebuffer *fb, int plane)
+ +{
+ +      unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+ +      unsigned int pitch = fb->pitches[plane];
+ +      u32 linear_offset = fb->offsets[plane];
+ +
+ +      *y = linear_offset / pitch;
+ +      *x = linear_offset % pitch / cpp;
+ +}
+ +
+ +static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier)
+ +{
+ +      switch (fb_modifier) {
+ +      case I915_FORMAT_MOD_X_TILED:
+ +              return I915_TILING_X;
+ +      case I915_FORMAT_MOD_Y_TILED:
+ +              return I915_TILING_Y;
+ +      default:
+ +              return I915_TILING_NONE;
+ +      }
+ +}
+ +
+ +static int
+ +intel_fill_fb_info(struct drm_i915_private *dev_priv,
+ +                 struct drm_framebuffer *fb)
+ +{
+ +      struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+ +      struct intel_rotation_info *rot_info = &intel_fb->rot_info;
+ +      u32 gtt_offset_rotated = 0;
+ +      unsigned int max_size = 0;
+ +      uint32_t format = fb->pixel_format;
+ +      int i, num_planes = drm_format_num_planes(format);
+ +      unsigned int tile_size = intel_tile_size(dev_priv);
+ +
+ +      for (i = 0; i < num_planes; i++) {
+ +              unsigned int width, height;
+ +              unsigned int cpp, size;
+ +              u32 offset;
+ +              int x, y;
+ +
+ +              cpp = drm_format_plane_cpp(format, i);
+ +              width = drm_format_plane_width(fb->width, format, i);
+ +              height = drm_format_plane_height(fb->height, format, i);
+ +
+ +              intel_fb_offset_to_xy(&x, &y, fb, i);
+ +
+ +              /*
+ +               * The fence (if used) is aligned to the start of the object
+ +               * so having the framebuffer wrap around across the edge of the
+ +               * fenced region doesn't really work. We have no API to configure
+ +               * the fence start offset within the object (nor could we probably
+ +               * on gen2/3). So it's just easier if we just require that the
+ +               * fb layout agrees with the fence layout. We already check that the
+ +               * fb stride matches the fence stride elsewhere.
+ +               */
+ +              if (i915_gem_object_is_tiled(intel_fb->obj) &&
+ +                  (x + width) * cpp > fb->pitches[i]) {
+ +                      DRM_DEBUG("bad fb plane %d offset: 0x%x\n",
+ +                                i, fb->offsets[i]);
+ +                      return -EINVAL;
+ +              }
+ +
+ +              /*
+ +               * First pixel of the framebuffer from
+ +               * the start of the normal gtt mapping.
+ +               */
+ +              intel_fb->normal[i].x = x;
+ +              intel_fb->normal[i].y = y;
+ +
+ +              offset = _intel_compute_tile_offset(dev_priv, &x, &y,
+ +                                                  fb, 0, fb->pitches[i],
+ +                                                  DRM_ROTATE_0, tile_size);
+ +              offset /= tile_size;
+ +
+ +              if (fb->modifier[i] != DRM_FORMAT_MOD_NONE) {
+ +                      unsigned int tile_width, tile_height;
+ +                      unsigned int pitch_tiles;
+ +                      struct drm_rect r;
+ +
+ +                      intel_tile_dims(dev_priv, &tile_width, &tile_height,
+ +                                      fb->modifier[i], cpp);
+ +
+ +                      rot_info->plane[i].offset = offset;
+ +                      rot_info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], tile_width * cpp);
+ +                      rot_info->plane[i].width = DIV_ROUND_UP(x + width, tile_width);
+ +                      rot_info->plane[i].height = DIV_ROUND_UP(y + height, tile_height);
+ +
+ +                      intel_fb->rotated[i].pitch =
+ +                              rot_info->plane[i].height * tile_height;
+ +
+ +                      /* how many tiles does this plane need */
+ +                      size = rot_info->plane[i].stride * rot_info->plane[i].height;
+ +                      /*
+ +                       * If the plane isn't horizontally tile aligned,
+ +                       * we need one more tile.
+ +                       */
+ +                      if (x != 0)
+ +                              size++;
+ +
+ +                      /* rotate the x/y offsets to match the GTT view */
+ +                      r.x1 = x;
+ +                      r.y1 = y;
+ +                      r.x2 = x + width;
+ +                      r.y2 = y + height;
+ +                      drm_rect_rotate(&r,
+ +                                      rot_info->plane[i].width * tile_width,
+ +                                      rot_info->plane[i].height * tile_height,
+ +                                      DRM_ROTATE_270);
+ +                      x = r.x1;
+ +                      y = r.y1;
+ +
+ +                      /* rotate the tile dimensions to match the GTT view */
+ +                      pitch_tiles = intel_fb->rotated[i].pitch / tile_height;
+ +                      swap(tile_width, tile_height);
+ +
+ +                      /*
+ +                       * We only keep the x/y offsets, so push all of the
+ +                       * gtt offset into the x/y offsets.
+ +                       */
+ +                      _intel_adjust_tile_offset(&x, &y, tile_size,
+ +                                                tile_width, tile_height, pitch_tiles,
+ +                                                gtt_offset_rotated * tile_size, 0);
+ +
+ +                      gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height;
+ +
+ +                      /*
+ +                       * First pixel of the framebuffer from
+ +                       * the start of the rotated gtt mapping.
+ +                       */
+ +                      intel_fb->rotated[i].x = x;
+ +                      intel_fb->rotated[i].y = y;
+ +              } else {
+ +                      size = DIV_ROUND_UP((y + height) * fb->pitches[i] +
+ +                                          x * cpp, tile_size);
+ +              }
+ +
+ +              /* how many tiles in total needed in the bo */
+ +              max_size = max(max_size, offset + size);
+ +      }
+ +
+ +      if (max_size * tile_size > to_intel_framebuffer(fb)->obj->base.size) {
+ +              DRM_DEBUG("fb too big for bo (need %u bytes, have %zu bytes)\n",
+ +                        max_size * tile_size, to_intel_framebuffer(fb)->obj->base.size);
+ +              return -EINVAL;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
   static int i9xx_format_to_fourcc(int format)
   {
         switch (format) {
@@@ -2700,8 -2465,9 +2700,8 @@@ intel_alloc_initial_plane_obj(struct in
                 return false;
         }
   
- -      obj->tiling_mode = plane_config->tiling;
- -      if (obj->tiling_mode == I915_TILING_X)
- -              obj->stride = fb->pitches[0];
+ +      if (plane_config->tiling == I915_TILING_X)
+ +              obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X;
   
         mode_cmd.pixel_format = fb->pixel_format;
         mode_cmd.width = fb->width;
@@@ -2722,7 -2488,7 +2722,7 @@@
         return true;
   
   out_unref_obj:
- -      drm_gem_object_unreference(&obj->base);
+ +      i915_gem_object_put(obj);
         mutex_unlock(&dev->struct_mutex);
         return false;
   }
@@@ -2786,7 -2552,7 +2786,7 @@@ intel_find_initial_plane_obj(struct int
                         continue;
   
                 obj = intel_fb_obj(fb);
- -              if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) {
+ +              if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) {
                         drm_framebuffer_reference(fb);
                         goto valid_fb;
                 }
@@@ -2799,7 -2565,7 +2799,7 @@@
          * simplest solution is to just disable the primary plane now and
          * pretend the BIOS never had it enabled.
          */
- -      to_intel_plane_state(plane_state)->visible = false;
+ +      to_intel_plane_state(plane_state)->base.visible = false;
         crtc_state->plane_mask &= ~(1 << drm_plane_index(primary));
         intel_pre_disable_primary_noatomic(&intel_crtc->base);
         intel_plane->disable_plane(primary, &intel_crtc->base);
@@@ -2817,188 -2583,24 +2817,188 @@@ valid_fb
         plane_state->crtc_w = fb->width;
         plane_state->crtc_h = fb->height;
   
- -      intel_state->src.x1 = plane_state->src_x;
- -      intel_state->src.y1 = plane_state->src_y;
- -      intel_state->src.x2 = plane_state->src_x + plane_state->src_w;
- -      intel_state->src.y2 = plane_state->src_y + plane_state->src_h;
- -      intel_state->dst.x1 = plane_state->crtc_x;
- -      intel_state->dst.y1 = plane_state->crtc_y;
- -      intel_state->dst.x2 = plane_state->crtc_x + plane_state->crtc_w;
- -      intel_state->dst.y2 = plane_state->crtc_y + plane_state->crtc_h;
+ +      intel_state->base.src.x1 = plane_state->src_x;
+ +      intel_state->base.src.y1 = plane_state->src_y;
+ +      intel_state->base.src.x2 = plane_state->src_x + plane_state->src_w;
+ +      intel_state->base.src.y2 = plane_state->src_y + plane_state->src_h;
+ +      intel_state->base.dst.x1 = plane_state->crtc_x;
+ +      intel_state->base.dst.y1 = plane_state->crtc_y;
+ +      intel_state->base.dst.x2 = plane_state->crtc_x + plane_state->crtc_w;
+ +      intel_state->base.dst.y2 = plane_state->crtc_y + plane_state->crtc_h;
   
         obj = intel_fb_obj(fb);
- -      if (obj->tiling_mode != I915_TILING_NONE)
+ +      if (i915_gem_object_is_tiled(obj))
                 dev_priv->preserve_bios_swizzle = true;
   
         drm_framebuffer_reference(fb);
         primary->fb = primary->state->fb = fb;
         primary->crtc = primary->state->crtc = &intel_crtc->base;
         intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary));
- -      obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit;
+ +      atomic_or(to_intel_plane(primary)->frontbuffer_bit,
+ +                &obj->frontbuffer_bits);
+ +}
+ +
+ +static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane,
+ +                             unsigned int rotation)
+ +{
+ +      int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+ +
+ +      switch (fb->modifier[plane]) {
+ +      case DRM_FORMAT_MOD_NONE:
+ +      case I915_FORMAT_MOD_X_TILED:
+ +              switch (cpp) {
+ +              case 8:
+ +                      return 4096;
+ +              case 4:
+ +              case 2:
+ +              case 1:
+ +                      return 8192;
+ +              default:
+ +                      MISSING_CASE(cpp);
+ +                      break;
+ +              }
+ +              break;
+ +      case I915_FORMAT_MOD_Y_TILED:
+ +      case I915_FORMAT_MOD_Yf_TILED:
+ +              switch (cpp) {
+ +              case 8:
+ +                      return 2048;
+ +              case 4:
+ +                      return 4096;
+ +              case 2:
+ +              case 1:
+ +                      return 8192;
+ +              default:
+ +                      MISSING_CASE(cpp);
+ +                      break;
+ +              }
+ +              break;
+ +      default:
+ +              MISSING_CASE(fb->modifier[plane]);
+ +      }
+ +
+ +      return 2048;
+ +}
+ +
+ +static int skl_check_main_surface(struct intel_plane_state *plane_state)
+ +{
+ +      const struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev);
+ +      const struct drm_framebuffer *fb = plane_state->base.fb;
+ +      unsigned int rotation = plane_state->base.rotation;
+ +      int x = plane_state->base.src.x1 >> 16;
+ +      int y = plane_state->base.src.y1 >> 16;
+ +      int w = drm_rect_width(&plane_state->base.src) >> 16;
+ +      int h = drm_rect_height(&plane_state->base.src) >> 16;
+ +      int max_width = skl_max_plane_width(fb, 0, rotation);
+ +      int max_height = 4096;
+ +      u32 alignment, offset, aux_offset = plane_state->aux.offset;
+ +
+ +      if (w > max_width || h > max_height) {
+ +              DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n",
+ +                            w, h, max_width, max_height);
+ +              return -EINVAL;
+ +      }
+ +
+ +      intel_add_fb_offsets(&x, &y, plane_state, 0);
+ +      offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
+ +
+ +      alignment = intel_surf_alignment(dev_priv, fb->modifier[0]);
+ +
+ +      /*
+ +       * AUX surface offset is specified as the distance from the
+ +       * main surface offset, and it must be non-negative. Make
+ +       * sure that is what we will get.
+ +       */
+ +      if (offset > aux_offset)
+ +              offset = intel_adjust_tile_offset(&x, &y, plane_state, 0,
+ +                                                offset, aux_offset & ~(alignment - 1));
+ +
+ +      /*
+ +       * When using an X-tiled surface, the plane blows up
+ +       * if the x offset + width exceed the stride.
+ +       *
+ +       * TODO: linear and Y-tiled seem fine, Yf untested,
+ +       */
+ +      if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) {
+ +              int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+ +
+ +              while ((x + w) * cpp > fb->pitches[0]) {
+ +                      if (offset == 0) {
+ +                              DRM_DEBUG_KMS("Unable to find suitable display surface offset\n");
+ +                              return -EINVAL;
+ +                      }
+ +
+ +                      offset = intel_adjust_tile_offset(&x, &y, plane_state, 0,
+ +                                                        offset, offset - alignment);
+ +              }
+ +      }
+ +
+ +      plane_state->main.offset = offset;
+ +      plane_state->main.x = x;
+ +      plane_state->main.y = y;
+ +
+ +      return 0;
+ +}
+ +
+ +static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state)
+ +{
+ +      const struct drm_framebuffer *fb = plane_state->base.fb;
+ +      unsigned int rotation = plane_state->base.rotation;
+ +      int max_width = skl_max_plane_width(fb, 1, rotation);
+ +      int max_height = 4096;
+ +      int x = plane_state->base.src.x1 >> 17;
+ +      int y = plane_state->base.src.y1 >> 17;
+ +      int w = drm_rect_width(&plane_state->base.src) >> 17;
+ +      int h = drm_rect_height(&plane_state->base.src) >> 17;
+ +      u32 offset;
+ +
+ +      intel_add_fb_offsets(&x, &y, plane_state, 1);
+ +      offset = intel_compute_tile_offset(&x, &y, plane_state, 1);
+ +
+ +      /* FIXME not quite sure how/if these apply to the chroma plane */
+ +      if (w > max_width || h > max_height) {
+ +              DRM_DEBUG_KMS("CbCr source size %dx%d too big (limit %dx%d)\n",
+ +                            w, h, max_width, max_height);
+ +              return -EINVAL;
+ +      }
+ +
+ +      plane_state->aux.offset = offset;
+ +      plane_state->aux.x = x;
+ +      plane_state->aux.y = y;
+ +
+ +      return 0;
+ +}
+ +
+ +int skl_check_plane_surface(struct intel_plane_state *plane_state)
+ +{
+ +      const struct drm_framebuffer *fb = plane_state->base.fb;
+ +      unsigned int rotation = plane_state->base.rotation;
+ +      int ret;
+ +
+ +      /* Rotate src coordinates to match rotated GTT view */
+ +      if (intel_rotation_90_or_270(rotation))
+ +              drm_rect_rotate(&plane_state->base.src,
+ +                              fb->width, fb->height, DRM_ROTATE_270);
+ +
+ +      /*
+ +       * Handle the AUX surface first since
+ +       * the main surface setup depends on it.
+ +       */
+ +      if (fb->pixel_format == DRM_FORMAT_NV12) {
+ +              ret = skl_check_nv12_aux_surface(plane_state);
+ +              if (ret)
+ +                      return ret;
+ +      } else {
+ +              plane_state->aux.offset = ~0xfff;
+ +              plane_state->aux.x = 0;
+ +              plane_state->aux.y = 0;
+ +      }
+ +
+ +      ret = skl_check_main_surface(plane_state);
+ +      if (ret)
+ +              return ret;
+ +
+ +      return 0;
   }
   
   static void i9xx_update_primary_plane(struct drm_plane *primary,
@@@ -3015,8 -2617,9 +3015,8 @@@
         u32 dspcntr;
         i915_reg_t reg = DSPCNTR(plane);
         unsigned int rotation = plane_state->base.rotation;
- -      int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
- -      int x = plane_state->src.x1 >> 16;
- -      int y = plane_state->src.y1 >> 16;
+ +      int x = plane_state->base.src.x1 >> 16;
+ +      int y = plane_state->base.src.y1 >> 16;
   
         dspcntr = DISPPLANE_GAMMA_ENABLE;
   
@@@ -3067,31 -2670,37 +3067,31 @@@
                 BUG();
         }
   
- -      if (INTEL_INFO(dev)->gen >= 4 &&
- -          obj->tiling_mode != I915_TILING_NONE)
+ +      if (INTEL_GEN(dev_priv) >= 4 &&
+ +          fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
                 dspcntr |= DISPPLANE_TILED;
   
         if (IS_G4X(dev))
                 dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
   
- -      linear_offset = y * fb->pitches[0] + x * cpp;
+ +      intel_add_fb_offsets(&x, &y, plane_state, 0);
   
- -      if (INTEL_INFO(dev)->gen >= 4) {
+ +      if (INTEL_INFO(dev)->gen >= 4)
                 intel_crtc->dspaddr_offset =
- -                      intel_compute_tile_offset(&x, &y, fb, 0,
- -                                                fb->pitches[0], rotation);
- -              linear_offset -= intel_crtc->dspaddr_offset;
- -      } else {
- -              intel_crtc->dspaddr_offset = linear_offset;
- -      }
+ +                      intel_compute_tile_offset(&x, &y, plane_state, 0);
   
- -      if (rotation == BIT(DRM_ROTATE_180)) {
+ +      if (rotation == DRM_ROTATE_180) {
                 dspcntr |= DISPPLANE_ROTATE_180;
   
                 x += (crtc_state->pipe_src_w - 1);
                 y += (crtc_state->pipe_src_h - 1);
- -
- -              /* Finding the last pixel of the last line of the display
- -              data and adding to linear_offset*/
- -              linear_offset +=
- -                      (crtc_state->pipe_src_h - 1) * fb->pitches[0] +
- -                      (crtc_state->pipe_src_w - 1) * cpp;
         }
   
+ +      linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
+ +
+ +      if (INTEL_INFO(dev)->gen < 4)
+ +              intel_crtc->dspaddr_offset = linear_offset;
+ +
         intel_crtc->adjusted_x = x;
         intel_crtc->adjusted_y = y;
   
@@@ -3100,12 -2709,11 +3100,12 @@@
         I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
         if (INTEL_INFO(dev)->gen >= 4) {
                 I915_WRITE(DSPSURF(plane),
- -                         i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+ +                         intel_fb_gtt_offset(fb, rotation) +
+ +                         intel_crtc->dspaddr_offset);
                 I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
                 I915_WRITE(DSPLINOFF(plane), linear_offset);
         } else
- -              I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset);
+ +              I915_WRITE(DSPADDR(plane), i915_gem_object_ggtt_offset(obj, NULL) + linear_offset);
         POSTING_READ(reg);
   }
   
@@@ -3133,13 -2741,15 +3133,13 @@@ static void ironlake_update_primary_pla
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
         struct drm_framebuffer *fb = plane_state->base.fb;
- -      struct drm_i915_gem_object *obj = intel_fb_obj(fb);
         int plane = intel_crtc->plane;
         u32 linear_offset;
         u32 dspcntr;
         i915_reg_t reg = DSPCNTR(plane);
         unsigned int rotation = plane_state->base.rotation;
- -      int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
- -      int x = plane_state->src.x1 >> 16;
- -      int y = plane_state->src.y1 >> 16;
+ +      int x = plane_state->base.src.x1 >> 16;
+ +      int y = plane_state->base.src.y1 >> 16;
   
         dspcntr = DISPPLANE_GAMMA_ENABLE;
         dspcntr |= DISPLAY_PLANE_ENABLE;
@@@ -3170,28 -2780,32 +3170,28 @@@
                 BUG();
         }
   
- -      if (obj->tiling_mode != I915_TILING_NONE)
+ +      if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
                 dspcntr |= DISPPLANE_TILED;
   
         if (!IS_HASWELL(dev) && !IS_BROADWELL(dev))
                 dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
   
- -      linear_offset = y * fb->pitches[0] + x * cpp;
+ +      intel_add_fb_offsets(&x, &y, plane_state, 0);
+ +
         intel_crtc->dspaddr_offset =
- -              intel_compute_tile_offset(&x, &y, fb, 0,
- -                                        fb->pitches[0], rotation);
- -      linear_offset -= intel_crtc->dspaddr_offset;
- -      if (rotation == BIT(DRM_ROTATE_180)) {
+ +              intel_compute_tile_offset(&x, &y, plane_state, 0);
+ +
+ +      if (rotation == DRM_ROTATE_180) {
                 dspcntr |= DISPPLANE_ROTATE_180;
   
                 if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
                         x += (crtc_state->pipe_src_w - 1);
                         y += (crtc_state->pipe_src_h - 1);
- -
- -                      /* Finding the last pixel of the last line of the display
- -                      data and adding to linear_offset*/
- -                      linear_offset +=
- -                              (crtc_state->pipe_src_h - 1) * fb->pitches[0] +
- -                              (crtc_state->pipe_src_w - 1) * cpp;
                 }
         }
   
+ +      linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
+ +
         intel_crtc->adjusted_x = x;
         intel_crtc->adjusted_y = y;
   
@@@ -3199,8 -2813,7 +3199,8 @@@
   
         I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
         I915_WRITE(DSPSURF(plane),
- -                 i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+ +                 intel_fb_gtt_offset(fb, rotation) +
+ +                 intel_crtc->dspaddr_offset);
         if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
                 I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
         } else {
@@@ -3222,21 -2835,32 +3222,21 @@@ u32 intel_fb_stride_alignment(const str
         }
   }
   
- -u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
- -                         struct drm_i915_gem_object *obj,
- -                         unsigned int plane)
+ +u32 intel_fb_gtt_offset(struct drm_framebuffer *fb,
+ +                      unsigned int rotation)
   {
+ +      struct drm_i915_gem_object *obj = intel_fb_obj(fb);
         struct i915_ggtt_view view;
         struct i915_vma *vma;
- -      u64 offset;
   
- -      intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb,
- -                              intel_plane->base.state->rotation);
+ +      intel_fill_fb_ggtt_view(&view, fb, rotation);
   
- -      vma = i915_gem_obj_to_ggtt_view(obj, &view);
+ +      vma = i915_gem_object_to_ggtt(obj, &view);
         if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n",
- -              view.type))
+ +               view.type))
                 return -1;
   
- -      offset = vma->node.start;
- -
- -      if (plane == 1) {
- -              offset += vma->ggtt_view.params.rotated.uv_start_page *
- -                        PAGE_SIZE;
- -      }
- -
- -      WARN_ON(upper_32_bits(offset));
- -
- -      return lower_32_bits(offset);
+ +      return i915_ggtt_offset(vma);
   }
   
   static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id)
@@@ -3266,28 -2890,6 +3266,28 @@@ static void skl_detach_scalers(struct i
         }
   }
   
+ +u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
+ +                   unsigned int rotation)
+ +{
+ +      const struct drm_i915_private *dev_priv = to_i915(fb->dev);
+ +      u32 stride = intel_fb_pitch(fb, plane, rotation);
+ +
+ +      /*
+ +       * The stride is either expressed as a multiple of 64 bytes chunks for
+ +       * linear buffers or in number of tiles for tiled buffers.
+ +       */
+ +      if (intel_rotation_90_or_270(rotation)) {
+ +              int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+ +
+ +              stride /= intel_tile_height(dev_priv, fb->modifier[0], cpp);
+ +      } else {
+ +              stride /= intel_fb_stride_alignment(dev_priv, fb->modifier[0],
+ +                                                  fb->pixel_format);
+ +      }
+ +
+ +      return stride;
+ +}
+ +
   u32 skl_plane_ctl_format(uint32_t pixel_format)
   {
         switch (pixel_format) {
@@@ -3350,17 -2952,17 +3350,17 @@@ u32 skl_plane_ctl_tiling(uint64_t fb_mo
   u32 skl_plane_ctl_rotation(unsigned int rotation)
   {
         switch (rotation) {
- -      case BIT(DRM_ROTATE_0):
+ +      case DRM_ROTATE_0:
                 break;
         /*
          * DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr
          * while i915 HW rotation is clockwise, thats why this swapping.
          */
- -      case BIT(DRM_ROTATE_90):
+ +      case DRM_ROTATE_90:
                 return PLANE_CTL_ROTATE_270;
- -      case BIT(DRM_ROTATE_180):
+ +      case DRM_ROTATE_180:
                 return PLANE_CTL_ROTATE_180;
- -      case BIT(DRM_ROTATE_270):
+ +      case DRM_ROTATE_270:
                 return PLANE_CTL_ROTATE_90;
         default:
                 MISSING_CASE(rotation);
@@@ -3377,21 -2979,22 +3377,21 @@@ static void skylake_update_primary_plan
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
         struct drm_framebuffer *fb = plane_state->base.fb;
- -      struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+ +      const struct skl_wm_values *wm = &dev_priv->wm.skl_results;
         int pipe = intel_crtc->pipe;
- -      u32 plane_ctl, stride_div, stride;
- -      u32 tile_height, plane_offset, plane_size;
+ +      u32 plane_ctl;
         unsigned int rotation = plane_state->base.rotation;
- -      int x_offset, y_offset;
- -      u32 surf_addr;
+ +      u32 stride = skl_plane_stride(fb, 0, rotation);
+ +      u32 surf_addr = plane_state->main.offset;
         int scaler_id = plane_state->scaler_id;
- -      int src_x = plane_state->src.x1 >> 16;
- -      int src_y = plane_state->src.y1 >> 16;
- -      int src_w = drm_rect_width(&plane_state->src) >> 16;
- -      int src_h = drm_rect_height(&plane_state->src) >> 16;
- -      int dst_x = plane_state->dst.x1;
- -      int dst_y = plane_state->dst.y1;
- -      int dst_w = drm_rect_width(&plane_state->dst);
- -      int dst_h = drm_rect_height(&plane_state->dst);
+ +      int src_x = plane_state->main.x;
+ +      int src_y = plane_state->main.y;
+ +      int src_w = drm_rect_width(&plane_state->base.src) >> 16;
+ +      int src_h = drm_rect_height(&plane_state->base.src) >> 16;
+ +      int dst_x = plane_state->base.dst.x1;
+ +      int dst_y = plane_state->base.dst.y1;
+ +      int dst_w = drm_rect_width(&plane_state->base.dst);
+ +      int dst_h = drm_rect_height(&plane_state->base.dst);
   
         plane_ctl = PLANE_CTL_ENABLE |
                     PLANE_CTL_PIPE_GAMMA_ENABLE |
@@@ -3402,22 -3005,36 +3402,22 @@@
         plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE;
         plane_ctl |= skl_plane_ctl_rotation(rotation);
   
- -      stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0],
- -                                             fb->pixel_format);
- -      surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj, 0);
+ +      /* Sizes are 0 based */
+ +      src_w--;
+ +      src_h--;
+ +      dst_w--;
+ +      dst_h--;
   
- -      WARN_ON(drm_rect_width(&plane_state->src) == 0);
+ +      intel_crtc->adjusted_x = src_x;
+ +      intel_crtc->adjusted_y = src_y;
   
- -      if (intel_rotation_90_or_270(rotation)) {
- -              int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
- -
- -              /* stride = Surface height in tiles */
- -              tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp);
- -              stride = DIV_ROUND_UP(fb->height, tile_height);
- -              x_offset = stride * tile_height - src_y - src_h;
- -              y_offset = src_x;
- -              plane_size = (src_w - 1) << 16 | (src_h - 1);
- -      } else {
- -              stride = fb->pitches[0] / stride_div;
- -              x_offset = src_x;
- -              y_offset = src_y;
- -              plane_size = (src_h - 1) << 16 | (src_w - 1);
- -      }
- -      plane_offset = y_offset << 16 | x_offset;
- -
- -      intel_crtc->adjusted_x = x_offset;
- -      intel_crtc->adjusted_y = y_offset;
+ +      if (wm->dirty_pipes & drm_crtc_mask(&intel_crtc->base))
+ +              skl_write_plane_wm(intel_crtc, wm, 0);
   
         I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl);
- -      I915_WRITE(PLANE_OFFSET(pipe, 0), plane_offset);
- -      I915_WRITE(PLANE_SIZE(pipe, 0), plane_size);
+ +      I915_WRITE(PLANE_OFFSET(pipe, 0), (src_y << 16) | src_x);
         I915_WRITE(PLANE_STRIDE(pipe, 0), stride);
+ +      I915_WRITE(PLANE_SIZE(pipe, 0), (src_h << 16) | src_w);
   
         if (scaler_id >= 0) {
                 uint32_t ps_ctrl = 0;
@@@ -3434,8 -3051,7 +3434,8 @@@
                 I915_WRITE(PLANE_POS(pipe, 0), (dst_y << 16) | dst_x);
         }
   
- -      I915_WRITE(PLANE_SURF(pipe, 0), surf_addr);
+ +      I915_WRITE(PLANE_SURF(pipe, 0),
+ +                 intel_fb_gtt_offset(fb, rotation) + surf_addr);
   
         POSTING_READ(PLANE_SURF(pipe, 0));
   }
@@@ -3445,15 -3061,7 +3445,15 @@@ static void skylake_disable_primary_pla
   {
         struct drm_device *dev = crtc->dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
- -      int pipe = to_intel_crtc(crtc)->pipe;
+ +      struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ +      int pipe = intel_crtc->pipe;
+ +
+ +      /*
+ +       * We only populate skl_results on watermark updates, and if the
+ +       * plane's visiblity isn't actually changing neither is its watermarks.
+ +       */
+ +      if (!crtc->primary->state->visible)
+ +              skl_write_plane_wm(intel_crtc, &dev_priv->wm.skl_results, 0);
   
         I915_WRITE(PLANE_CTL(pipe, 0), 0);
         I915_WRITE(PLANE_SURF(pipe, 0), 0);
@@@ -3488,7 -3096,7 +3488,7 @@@ static void intel_update_primary_planes
                 struct intel_plane_state *plane_state =
                         to_intel_plane_state(plane->base.state);
   
- -              if (plane_state->visible)
+ +              if (plane_state->base.visible)
                         plane->update_plane(&plane->base,
                                             to_intel_crtc_state(crtc->state),
                                             plane_state);
@@@ -3527,12 -3135,6 +3527,12 @@@ __intel_display_resume(struct drm_devic
         return ret;
   }
   
+ +static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
+ +{
+ +      return intel_has_gpu_reset(dev_priv) &&
+ +              INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv);
+ +}
+ +
   void intel_prepare_reset(struct drm_i915_private *dev_priv)
   {
         struct drm_device *dev = &dev_priv->drm;
@@@ -3540,6 -3142,10 +3540,6 @@@
         struct drm_atomic_state *state;
         int ret;
   
- -      /* no reset support for gen2 */
- -      if (IS_GEN2(dev_priv))
- -              return;
- -
         /*
          * Need mode_config.mutex so that we don't
          * trample ongoing ->detect() and whatnot.
@@@ -3555,8 -3161,7 +3555,8 @@@
         }
   
         /* reset doesn't touch the display, but flips might get nuked anyway, */
- -      if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
+ +      if (!i915.force_reset_modeset_test &&
+ +          !gpu_reset_clobbers_display(dev_priv))
                 return;
   
         /*
@@@ -3599,26 -3204,24 +3599,28 @@@ void intel_finish_reset(struct drm_i915
          */
         intel_complete_page_flips(dev_priv);
   
- -      /* no reset support for gen2 */
- -      if (IS_GEN2(dev_priv))
- -              return;
+ +      dev_priv->modeset_restore_state = NULL;
   
+       dev_priv->modeset_restore_state = NULL;
+ 
         /* reset doesn't touch the display */
- -      if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
- -              /*
- -               * Flips in the rings have been nuked by the reset,
- -               * so update the base address of all primary
- -               * planes to the the last fb to make sure we're
- -               * showing the correct fb after a reset.
- -               *
- -               * FIXME: Atomic will make this obsolete since we won't schedule
- -               * CS-based flips (which might get lost in gpu resets) any more.
- -               */
- -              intel_update_primary_planes(dev);
+ +      if (!gpu_reset_clobbers_display(dev_priv)) {
+ +              if (!state) {
+ +                      /*
+ +                       * Flips in the rings have been nuked by the reset,
+ +                       * so update the base address of all primary
+ +                       * planes to the the last fb to make sure we're
+ +                       * showing the correct fb after a reset.
+ +                       *
+ +                       * FIXME: Atomic will make this obsolete since we won't schedule
+ +                       * CS-based flips (which might get lost in gpu resets) any more.
+ +                       */
+ +                      intel_update_primary_planes(dev);
+ +              } else {
+ +                      ret = __intel_display_resume(dev, state);
+ +                      if (ret)
+ +                              DRM_ERROR("Restoring old state failed with %i\n", ret);
+ +              }
         } else {
                 /*
                  * The display has been reset as well,
@@@ -3646,26 -3249,15 +3648,26 @@@
         mutex_unlock(&dev->mode_config.mutex);
   }
   
+ +static bool abort_flip_on_reset(struct intel_crtc *crtc)
+ +{
+ +      struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error;
+ +
+ +      if (i915_reset_in_progress(error))
+ +              return true;
+ +
+ +      if (crtc->reset_count != i915_reset_count(error))
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
   static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
   {
         struct drm_device *dev = crtc->dev;
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- -      unsigned reset_counter;
         bool pending;
   
- -      reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
- -      if (intel_crtc->reset_counter != reset_counter)
+ +      if (abort_flip_on_reset(intel_crtc))
                 return false;
   
         spin_lock_irq(&dev->event_lock);
@@@ -4308,7 -3900,7 +4310,7 @@@ static int intel_crtc_wait_for_pending_
         return 0;
   }
   
- -static void lpt_disable_iclkip(struct drm_i915_private *dev_priv)
+ +void lpt_disable_iclkip(struct drm_i915_private *dev_priv)
   {
         u32 temp;
   
@@@ -4731,7 -4323,7 +4733,7 @@@ int skl_update_scaler_crtc(struct intel
                       intel_crtc->pipe, SKL_CRTC_INDEX);
   
         return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
- -              &state->scaler_state.scaler_id, BIT(DRM_ROTATE_0),
+ +              &state->scaler_state.scaler_id, DRM_ROTATE_0,
                 state->pipe_src_w, state->pipe_src_h,
                 adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
   }
@@@ -4756,7 -4348,7 +4758,7 @@@ static int skl_update_scaler_plane(stru
         struct drm_framebuffer *fb = plane_state->base.fb;
         int ret;
   
- -      bool force_detach = !fb || !plane_state->visible;
+ +      bool force_detach = !fb || !plane_state->base.visible;
   
         DRM_DEBUG_KMS("Updating scaler for [PLANE:%d:%s] scaler_user index %u.%u\n",
                       intel_plane->base.base.id, intel_plane->base.name,
@@@ -4766,10 -4358,10 +4768,10 @@@
                                 drm_plane_index(&intel_plane->base),
                                 &plane_state->scaler_id,
                                 plane_state->base.rotation,
- -                              drm_rect_width(&plane_state->src) >> 16,
- -                              drm_rect_height(&plane_state->src) >> 16,
- -                              drm_rect_width(&plane_state->dst),
- -                              drm_rect_height(&plane_state->dst));
+ +                              drm_rect_width(&plane_state->base.src) >> 16,
+ +                              drm_rect_height(&plane_state->base.src) >> 16,
+ +                              drm_rect_width(&plane_state->base.dst),
+ +                              drm_rect_height(&plane_state->base.dst));
   
         if (ret || plane_state->scaler_id < 0)
                 return ret;
@@@ -5047,11 -4639,12 +5049,11 @@@ static void intel_post_plane_update(str
         struct drm_atomic_state *old_state = old_crtc_state->base.state;
         struct intel_crtc_state *pipe_config =
                 to_intel_crtc_state(crtc->base.state);
- -      struct drm_device *dev = crtc->base.dev;
         struct drm_plane *primary = crtc->base.primary;
         struct drm_plane_state *old_pri_state =
                 drm_atomic_get_existing_plane_state(old_state, primary);
   
- -      intel_frontbuffer_flip(dev, pipe_config->fb_bits);
+ +      intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits);
   
         crtc->wm.cxsr_allowed = true;
   
@@@ -5066,9 -4659,9 +5068,9 @@@
   
                 intel_fbc_post_update(crtc);
   
- -              if (primary_state->visible &&
+ +              if (primary_state->base.visible &&
                     (needs_modeset(&pipe_config->base) ||
- -                   !old_primary_state->visible))
+ +                   !old_primary_state->base.visible))
                         intel_post_enable_primary(&crtc->base);
         }
   }
@@@ -5094,8 -4687,8 +5096,8 @@@ static void intel_pre_plane_update(stru
   
                 intel_fbc_pre_update(crtc, pipe_config, primary_state);
   
- -              if (old_primary_state->visible &&
- -                  (modeset || !primary_state->visible))
+ +              if (old_primary_state->base.visible &&
+ +                  (modeset || !primary_state->base.visible))
                         intel_pre_disable_primary(&crtc->base);
         }
   
@@@ -5174,140 -4767,18 +5176,140 @@@ static void intel_crtc_disable_planes(s
          * to compute the mask of flip planes precisely. For the time being
          * consider this a flip to a NULL plane.
          */
- -      intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
+ +      intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe));
+ +}
+ +
+ +static void intel_encoders_pre_pll_enable(struct drm_crtc *crtc,
+ +                                        struct intel_crtc_state *crtc_state,
+ +                                        struct drm_atomic_state *old_state)
+ +{
+ +      struct drm_connector_state *old_conn_state;
+ +      struct drm_connector *conn;
+ +      int i;
+ +
+ +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ +              struct drm_connector_state *conn_state = conn->state;
+ +              struct intel_encoder *encoder =
+ +                      to_intel_encoder(conn_state->best_encoder);
+ +
+ +              if (conn_state->crtc != crtc)
+ +                      continue;
+ +
+ +              if (encoder->pre_pll_enable)
+ +                      encoder->pre_pll_enable(encoder, crtc_state, conn_state);
+ +      }
+ +}
+ +
+ +static void intel_encoders_pre_enable(struct drm_crtc *crtc,
+ +                                    struct intel_crtc_state *crtc_state,
+ +                                    struct drm_atomic_state *old_state)
+ +{
+ +      struct drm_connector_state *old_conn_state;
+ +      struct drm_connector *conn;
+ +      int i;
+ +
+ +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ +              struct drm_connector_state *conn_state = conn->state;
+ +              struct intel_encoder *encoder =
+ +                      to_intel_encoder(conn_state->best_encoder);
+ +
+ +              if (conn_state->crtc != crtc)
+ +                      continue;
+ +
+ +              if (encoder->pre_enable)
+ +                      encoder->pre_enable(encoder, crtc_state, conn_state);
+ +      }
+ +}
+ +
+ +static void intel_encoders_enable(struct drm_crtc *crtc,
+ +                                struct intel_crtc_state *crtc_state,
+ +                                struct drm_atomic_state *old_state)
+ +{
+ +      struct drm_connector_state *old_conn_state;
+ +      struct drm_connector *conn;
+ +      int i;
+ +
+ +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ +              struct drm_connector_state *conn_state = conn->state;
+ +              struct intel_encoder *encoder =
+ +                      to_intel_encoder(conn_state->best_encoder);
+ +
+ +              if (conn_state->crtc != crtc)
+ +                      continue;
+ +
+ +              encoder->enable(encoder, crtc_state, conn_state);
+ +              intel_opregion_notify_encoder(encoder, true);
+ +      }
+ +}
+ +
+ +static void intel_encoders_disable(struct drm_crtc *crtc,
+ +                                 struct intel_crtc_state *old_crtc_state,
+ +                                 struct drm_atomic_state *old_state)
+ +{
+ +      struct drm_connector_state *old_conn_state;
+ +      struct drm_connector *conn;
+ +      int i;
+ +
+ +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ +              struct intel_encoder *encoder =
+ +                      to_intel_encoder(old_conn_state->best_encoder);
+ +
+ +              if (old_conn_state->crtc != crtc)
+ +                      continue;
+ +
+ +              intel_opregion_notify_encoder(encoder, false);
+ +              encoder->disable(encoder, old_crtc_state, old_conn_state);
+ +      }
+ +}
+ +
+ +static void intel_encoders_post_disable(struct drm_crtc *crtc,
+ +                                      struct intel_crtc_state *old_crtc_state,
+ +                                      struct drm_atomic_state *old_state)
+ +{
+ +      struct drm_connector_state *old_conn_state;
+ +      struct drm_connector *conn;
+ +      int i;
+ +
+ +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ +              struct intel_encoder *encoder =
+ +                      to_intel_encoder(old_conn_state->best_encoder);
+ +
+ +              if (old_conn_state->crtc != crtc)
+ +                      continue;
+ +
+ +              if (encoder->post_disable)
+ +                      encoder->post_disable(encoder, old_crtc_state, old_conn_state);
+ +      }
+ +}
+ +
+ +static void intel_encoders_post_pll_disable(struct drm_crtc *crtc,
+ +                                          struct intel_crtc_state *old_crtc_state,
+ +                                          struct drm_atomic_state *old_state)
+ +{
+ +      struct drm_connector_state *old_conn_state;
+ +      struct drm_connector *conn;
+ +      int i;
+ +
+ +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ +              struct intel_encoder *encoder =
+ +                      to_intel_encoder(old_conn_state->best_encoder);
+ +
+ +              if (old_conn_state->crtc != crtc)
+ +                      continue;
+ +
+ +              if (encoder->post_pll_disable)
+ +                      encoder->post_pll_disable(encoder, old_crtc_state, old_conn_state);
+ +      }
   }
   
- -static void ironlake_crtc_enable(struct drm_crtc *crtc)
+ +static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config,
+ +                               struct drm_atomic_state *old_state)
   {
+ +      struct drm_crtc *crtc = pipe_config->base.crtc;
         struct drm_device *dev = crtc->dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- -      struct intel_encoder *encoder;
         int pipe = intel_crtc->pipe;
- -      struct intel_crtc_state *pipe_config =
- -              to_intel_crtc_state(crtc->state);
   
         if (WARN_ON(intel_crtc->active))
                 return;
@@@ -5345,7 -4816,9 +5347,7 @@@
   
         intel_crtc->active = true;
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              if (encoder->pre_enable)
- -                      encoder->pre_enable(encoder);
+ +      intel_encoders_pre_enable(crtc, pipe_config, old_state);
   
         if (intel_crtc->config->has_pch_encoder) {
                 /* Note: FDI PLL enabling _must_ be done before we enable the
@@@ -5375,7 -4848,8 +5377,7 @@@
         assert_vblank_disabled(crtc);
         drm_crtc_vblank_on(crtc);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              encoder->enable(encoder);
+ +      intel_encoders_enable(crtc, pipe_config, old_state);
   
         if (HAS_PCH_CPT(dev))
                 cpt_verify_modeset(dev, intel_crtc->pipe);
@@@ -5393,15 -4867,16 +5395,15 @@@ static bool hsw_crtc_supports_ips(struc
         return HAS_IPS(crtc->base.dev) && crtc->pipe == PIPE_A;
   }
   
- -static void haswell_crtc_enable(struct drm_crtc *crtc)
+ +static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
+ +                              struct drm_atomic_state *old_state)
   {
+ +      struct drm_crtc *crtc = pipe_config->base.crtc;
         struct drm_device *dev = crtc->dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- -      struct intel_encoder *encoder;
         int pipe = intel_crtc->pipe, hsw_workaround_pipe;
         enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
- -      struct intel_crtc_state *pipe_config =
- -              to_intel_crtc_state(crtc->state);
   
         if (WARN_ON(intel_crtc->active))
                 return;
@@@ -5410,7 -4885,9 +5412,7 @@@
                 intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
                                                       false);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              if (encoder->pre_pll_enable)
- -                      encoder->pre_pll_enable(encoder);
+ +      intel_encoders_pre_pll_enable(crtc, pipe_config, old_state);
   
         if (intel_crtc->config->shared_dpll)
                 intel_enable_shared_dpll(intel_crtc);
@@@ -5448,7 -4925,10 +5450,7 @@@
         else
                 intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder) {
- -              if (encoder->pre_enable)
- -                      encoder->pre_enable(encoder);
- -      }
+ +      intel_encoders_pre_enable(crtc, pipe_config, old_state);
   
         if (intel_crtc->config->has_pch_encoder)
                 dev_priv->display.fdi_link_train(crtc);
@@@ -5489,7 -4969,10 +5491,7 @@@
         assert_vblank_disabled(crtc);
         drm_crtc_vblank_on(crtc);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder) {
- -              encoder->enable(encoder);
- -              intel_opregion_notify_encoder(encoder, true);
- -      }
+ +      intel_encoders_enable(crtc, pipe_config, old_state);
   
         if (intel_crtc->config->has_pch_encoder) {
                 intel_wait_for_vblank(dev, pipe);
@@@ -5523,13 -5006,12 +5525,13 @@@ static void ironlake_pfit_disable(struc
         }
   }
   
- -static void ironlake_crtc_disable(struct drm_crtc *crtc)
+ +static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state,
+ +                                struct drm_atomic_state *old_state)
   {
+ +      struct drm_crtc *crtc = old_crtc_state->base.crtc;
         struct drm_device *dev = crtc->dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- -      struct intel_encoder *encoder;
         int pipe = intel_crtc->pipe;
   
         /*
@@@ -5542,7 -5024,8 +5544,7 @@@
                 intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
         }
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              encoder->disable(encoder);
+ +      intel_encoders_disable(crtc, old_crtc_state, old_state);
   
         drm_crtc_vblank_off(crtc);
         assert_vblank_disabled(crtc);
@@@ -5554,7 -5037,9 +5556,7 @@@
         if (intel_crtc->config->has_pch_encoder)
                 ironlake_fdi_disable(crtc);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              if (encoder->post_disable)
- -                      encoder->post_disable(encoder);
+ +      intel_encoders_post_disable(crtc, old_crtc_state, old_state);
   
         if (intel_crtc->config->has_pch_encoder) {
                 ironlake_disable_pch_transcoder(dev_priv, pipe);
@@@ -5584,20 -5069,22 +5586,20 @@@
         intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
   }
   
- -static void haswell_crtc_disable(struct drm_crtc *crtc)
+ +static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state,
+ +                               struct drm_atomic_state *old_state)
   {
+ +      struct drm_crtc *crtc = old_crtc_state->base.crtc;
         struct drm_device *dev = crtc->dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- -      struct intel_encoder *encoder;
         enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
   
         if (intel_crtc->config->has_pch_encoder)
                 intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
                                                       false);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder) {
- -              intel_opregion_notify_encoder(encoder, false);
- -              encoder->disable(encoder);
- -      }
+ +      intel_encoders_disable(crtc, old_crtc_state, old_state);
   
         drm_crtc_vblank_off(crtc);
         assert_vblank_disabled(crtc);
@@@ -5620,11 -5107,18 +5622,11 @@@
         if (!transcoder_is_dsi(cpu_transcoder))
                 intel_ddi_disable_pipe_clock(intel_crtc);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              if (encoder->post_disable)
- -                      encoder->post_disable(encoder);
- -
- -      if (intel_crtc->config->has_pch_encoder) {
- -              lpt_disable_pch_transcoder(dev_priv);
- -              lpt_disable_iclkip(dev_priv);
- -              intel_ddi_fdi_disable(crtc);
+ +      intel_encoders_post_disable(crtc, old_crtc_state, old_state);
   
+ +      if (old_crtc_state->has_pch_encoder)
                 intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
                                                       true);
- -      }
   }
   
   static void i9xx_pfit_enable(struct intel_crtc *crtc)
@@@ -6680,13 -6174,14 +6682,13 @@@ static void valleyview_modeset_commit_c
         intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A);
   }
   
- -static void valleyview_crtc_enable(struct drm_crtc *crtc)
+ +static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config,
+ +                                 struct drm_atomic_state *old_state)
   {
+ +      struct drm_crtc *crtc = pipe_config->base.crtc;
         struct drm_device *dev = crtc->dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- -      struct intel_encoder *encoder;
- -      struct intel_crtc_state *pipe_config =
- -              to_intel_crtc_state(crtc->state);
         int pipe = intel_crtc->pipe;
   
         if (WARN_ON(intel_crtc->active))
@@@ -6711,7 -6206,9 +6713,7 @@@
   
         intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              if (encoder->pre_pll_enable)
- -                      encoder->pre_pll_enable(encoder);
+ +      intel_encoders_pre_pll_enable(crtc, pipe_config, old_state);
   
         if (IS_CHERRYVIEW(dev)) {
                 chv_prepare_pll(intel_crtc, intel_crtc->config);
@@@ -6721,7 -6218,9 +6723,7 @@@
                 vlv_enable_pll(intel_crtc, intel_crtc->config);
         }
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              if (encoder->pre_enable)
- -                      encoder->pre_enable(encoder);
+ +      intel_encoders_pre_enable(crtc, pipe_config, old_state);
   
         i9xx_pfit_enable(intel_crtc);
   
@@@ -6733,7 -6232,8 +6735,7 @@@
         assert_vblank_disabled(crtc);
         drm_crtc_vblank_on(crtc);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              encoder->enable(encoder);
+ +      intel_encoders_enable(crtc, pipe_config, old_state);
   }
   
   static void i9xx_set_pll_dividers(struct intel_crtc *crtc)
@@@ -6745,13 -6245,14 +6747,13 @@@
         I915_WRITE(FP1(crtc->pipe), crtc->config->dpll_hw_state.fp1);
   }
   
- -static void i9xx_crtc_enable(struct drm_crtc *crtc)
+ +static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
+ +                           struct drm_atomic_state *old_state)
   {
+ +      struct drm_crtc *crtc = pipe_config->base.crtc;
         struct drm_device *dev = crtc->dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- -      struct intel_encoder *encoder;
- -      struct intel_crtc_state *pipe_config =
- -              to_intel_crtc_state(crtc->state);
         enum pipe pipe = intel_crtc->pipe;
   
         if (WARN_ON(intel_crtc->active))
@@@ -6772,7 -6273,9 +6774,7 @@@
         if (!IS_GEN2(dev))
                 intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              if (encoder->pre_enable)
- -                      encoder->pre_enable(encoder);
+ +      intel_encoders_pre_enable(crtc, pipe_config, old_state);
   
         i9xx_enable_pll(intel_crtc);
   
@@@ -6786,7 -6289,8 +6788,7 @@@
         assert_vblank_disabled(crtc);
         drm_crtc_vblank_on(crtc);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              encoder->enable(encoder);
+ +      intel_encoders_enable(crtc, pipe_config, old_state);
   }
   
   static void i9xx_pfit_disable(struct intel_crtc *crtc)
@@@ -6804,13 -6308,12 +6806,13 @@@
         I915_WRITE(PFIT_CONTROL, 0);
   }
   
- -static void i9xx_crtc_disable(struct drm_crtc *crtc)
+ +static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
+ +                            struct drm_atomic_state *old_state)
   {
+ +      struct drm_crtc *crtc = old_crtc_state->base.crtc;
         struct drm_device *dev = crtc->dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- -      struct intel_encoder *encoder;
         int pipe = intel_crtc->pipe;
   
         /*
@@@ -6820,7 -6323,8 +6822,7 @@@
         if (IS_GEN2(dev))
                 intel_wait_for_vblank(dev, pipe);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              encoder->disable(encoder);
+ +      intel_encoders_disable(crtc, old_crtc_state, old_state);
   
         drm_crtc_vblank_off(crtc);
         assert_vblank_disabled(crtc);
@@@ -6829,7 -6333,9 +6831,7 @@@
   
         i9xx_pfit_disable(intel_crtc);
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              if (encoder->post_disable)
- -                      encoder->post_disable(encoder);
+ +      intel_encoders_post_disable(crtc, old_crtc_state, old_state);
   
         if (!intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DSI)) {
                 if (IS_CHERRYVIEW(dev))
@@@ -6840,7 -6346,9 +6842,7 @@@
                         i9xx_disable_pll(intel_crtc);
         }
   
- -      for_each_encoder_on_crtc(dev, crtc, encoder)
- -              if (encoder->post_pll_disable)
- -                      encoder->post_pll_disable(encoder);
+ +      intel_encoders_post_pll_disable(crtc, old_crtc_state, old_state);
   
         if (!IS_GEN2(dev))
                 intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
@@@ -6853,34 -6361,20 +6855,34 @@@ static void intel_crtc_disable_noatomic
         struct drm_i915_private *dev_priv = to_i915(crtc->dev);
         enum intel_display_power_domain domain;
         unsigned long domains;
+ +      struct drm_atomic_state *state;
+ +      struct intel_crtc_state *crtc_state;
+ +      int ret;
   
         if (!intel_crtc->active)
                 return;
   
- -      if (to_intel_plane_state(crtc->primary->state)->visible) {
+ +      if (to_intel_plane_state(crtc->primary->state)->base.visible) {
                 WARN_ON(intel_crtc->flip_work);
   
                 intel_pre_disable_primary_noatomic(crtc);
   
                 intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary));
- -              to_intel_plane_state(crtc->primary->state)->visible = false;
+ +              to_intel_plane_state(crtc->primary->state)->base.visible = false;
         }
   
- -      dev_priv->display.crtc_disable(crtc);
+ +      state = drm_atomic_state_alloc(crtc->dev);
+ +      state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
+ +
+ +      /* Everything's already locked, -EDEADLK can't happen. */
+ +      crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
+ +      ret = drm_atomic_add_affected_connectors(state, crtc);
+ +
+ +      WARN_ON(IS_ERR(crtc_state) || ret);
+ +
+ +      dev_priv->display.crtc_disable(crtc_state, state);
+ +
+ +      drm_atomic_state_free(state);
   
         DRM_DEBUG_KMS("[CRTC:%d:%s] hw state adjusted, was enabled, now disabled\n",
                       crtc->base.id, crtc->name);
@@@ -7395,10 -6889,9 +7397,10 @@@ static int i9xx_misc_get_display_clock_
   
   static int pnv_get_display_clock_speed(struct drm_device *dev)
   {
+ +      struct pci_dev *pdev = dev->pdev;
         u16 gcfgc = 0;
   
- -      pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+ +      pci_read_config_word(pdev, GCFGC, &gcfgc);
   
         switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
         case GC_DISPLAY_CLOCK_267_MHZ_PNV:
@@@ -7420,10 -6913,9 +7422,10 @@@
   
   static int i915gm_get_display_clock_speed(struct drm_device *dev)
   {
+ +      struct pci_dev *pdev = dev->pdev;
         u16 gcfgc = 0;
   
- -      pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+ +      pci_read_config_word(pdev, GCFGC, &gcfgc);
   
         if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
                 return 133333;
@@@ -7445,7 -6937,6 +7447,7 @@@ static int i865_get_display_clock_speed
   
   static int i85x_get_display_clock_speed(struct drm_device *dev)
   {
+ +      struct pci_dev *pdev = dev->pdev;
         u16 hpllcc = 0;
   
         /*
@@@ -7453,10 -6944,10 +7455,10 @@@
          * encoding is different :(
          * FIXME is this the right way to detect 852GM/852GMV?
          */
- -      if (dev->pdev->revision == 0x1)
+ +      if (pdev->revision == 0x1)
                 return 133333;
   
- -      pci_bus_read_config_word(dev->pdev->bus,
+ +      pci_bus_read_config_word(pdev->bus,
                                  PCI_DEVFN(0, 3), HPLLCC, &hpllcc);
   
         /* Assume that the hardware is in the high speed state.  This
@@@ -7557,11 -7048,10 +7559,11 @@@ static unsigned int intel_hpll_vco(stru
   
   static int gm45_get_display_clock_speed(struct drm_device *dev)
   {
+ +      struct pci_dev *pdev = dev->pdev;
         unsigned int cdclk_sel, vco = intel_hpll_vco(dev);
         uint16_t tmp = 0;
   
- -      pci_read_config_word(dev->pdev, GCFGC, &tmp);
+ +      pci_read_config_word(pdev, GCFGC, &tmp);
   
         cdclk_sel = (tmp >> 12) & 0x1;
   
@@@ -7580,7 -7070,6 +7582,7 @@@
   
   static int i965gm_get_display_clock_speed(struct drm_device *dev)
   {
+ +      struct pci_dev *pdev = dev->pdev;
         static const uint8_t div_3200[] = { 16, 10,  8 };
         static const uint8_t div_4000[] = { 20, 12, 10 };
         static const uint8_t div_5333[] = { 24, 16, 14 };
@@@ -7588,7 -7077,7 +7590,7 @@@
         unsigned int cdclk_sel, vco = intel_hpll_vco(dev);
         uint16_t tmp = 0;
   
- -      pci_read_config_word(dev->pdev, GCFGC, &tmp);
+ +      pci_read_config_word(pdev, GCFGC, &tmp);
   
         cdclk_sel = ((tmp >> 8) & 0x1f) - 1;
   
@@@ -7618,7 -7107,6 +7620,7 @@@ fail
   
   static int g33_get_display_clock_speed(struct drm_device *dev)
   {
+ +      struct pci_dev *pdev = dev->pdev;
         static const uint8_t div_3200[] = { 12, 10,  8,  7, 5, 16 };
         static const uint8_t div_4000[] = { 14, 12, 10,  8, 6, 20 };
         static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 };
@@@ -7627,7 -7115,7 +7629,7 @@@
         unsigned int cdclk_sel, vco = intel_hpll_vco(dev);
         uint16_t tmp = 0;
   
- -      pci_read_config_word(dev->pdev, GCFGC, &tmp);
+ +      pci_read_config_word(pdev, GCFGC, &tmp);
   
         cdclk_sel = (tmp >> 4) & 0x7;
   
@@@ -9793,7 -9281,7 +9795,7 @@@ skylake_get_initial_plane_config(struc
         return;
   
   error:
- -      kfree(fb);
+ +      kfree(intel_fb);
   }
   
   static void ironlake_get_pfit_config(struct intel_crtc *crtc,
@@@ -9999,7 -9487,7 +10001,7 @@@ static void assert_can_disable_lcpll(st
         I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n");
         I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n");
         I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n");
- -      I915_STATE_WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n");
+ +      I915_STATE_WARN(I915_READ(PP_STATUS(0)) & PP_ON, "Panel power on\n");
         I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
              "CPU PWM1 enabled\n");
         if (IS_HASWELL(dev))
@@@ -10038,7 -9526,7 +10040,7 @@@ static void hsw_write_dcomp(struct drm_
                 mutex_lock(&dev_priv->rps.hw_lock);
                 if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP,
                                             val))
- -                      DRM_ERROR("Failed to write to D_COMP\n");
+ +                      DRM_DEBUG_KMS("Failed to write to D_COMP\n");
                 mutex_unlock(&dev_priv->rps.hw_lock);
         } else {
                 I915_WRITE(D_COMP_BDW, val);
@@@ -10446,12 -9934,15 +10448,12 @@@ static void bxt_get_ddi_pll(struct drm_
   
         switch (port) {
         case PORT_A:
- -              pipe_config->ddi_pll_sel = SKL_DPLL0;
                 id = DPLL_ID_SKL_DPLL0;
                 break;
         case PORT_B:
- -              pipe_config->ddi_pll_sel = SKL_DPLL1;
                 id = DPLL_ID_SKL_DPLL1;
                 break;
         case PORT_C:
- -              pipe_config->ddi_pll_sel = SKL_DPLL2;
                 id = DPLL_ID_SKL_DPLL2;
                 break;
         default:
@@@ -10470,10 -9961,25 +10472,10 @@@ static void skylake_get_ddi_pll(struct 
         u32 temp;
   
         temp = I915_READ(DPLL_CTRL2) & DPLL_CTRL2_DDI_CLK_SEL_MASK(port);
- -      pipe_config->ddi_pll_sel = temp >> (port * 3 + 1);
+ +      id = temp >> (port * 3 + 1);
   
- -      switch (pipe_config->ddi_pll_sel) {
- -      case SKL_DPLL0:
- -              id = DPLL_ID_SKL_DPLL0;
- -              break;
- -      case SKL_DPLL1:
- -              id = DPLL_ID_SKL_DPLL1;
- -              break;
- -      case SKL_DPLL2:
- -              id = DPLL_ID_SKL_DPLL2;
- -              break;
- -      case SKL_DPLL3:
- -              id = DPLL_ID_SKL_DPLL3;
- -              break;
- -      default:
- -              MISSING_CASE(pipe_config->ddi_pll_sel);
+ +      if (WARN_ON(id < SKL_DPLL0 || id > SKL_DPLL3))
                 return;
- -      }
   
         pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id);
   }
@@@ -10483,9 -9989,10 +10485,9 @@@ static void haswell_get_ddi_pll(struct 
                                 struct intel_crtc_state *pipe_config)
   {
         enum intel_dpll_id id;
+ +      uint32_t ddi_pll_sel = I915_READ(PORT_CLK_SEL(port));
   
- -      pipe_config->ddi_pll_sel = I915_READ(PORT_CLK_SEL(port));
- -
- -      switch (pipe_config->ddi_pll_sel) {
+ +      switch (ddi_pll_sel) {
         case PORT_CLK_SEL_WRPLL1:
                 id = DPLL_ID_WRPLL1;
                 break;
@@@ -10505,7 -10012,7 +10507,7 @@@
                 id = DPLL_ID_LCPLL_2700;
                 break;
         default:
- -              MISSING_CASE(pipe_config->ddi_pll_sel);
+ +              MISSING_CASE(ddi_pll_sel);
                 /* fall through */
         case PORT_CLK_SEL_NONE:
                 return;
@@@ -10738,7 -10245,7 +10740,7 @@@ static void i845_update_cursor(struct d
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
         uint32_t cntl = 0, size = 0;
   
- -      if (plane_state && plane_state->visible) {
+ +      if (plane_state && plane_state->base.visible) {
                 unsigned int width = plane_state->base.crtc_w;
                 unsigned int height = plane_state->base.crtc_h;
                 unsigned int stride = roundup_pow_of_two(width) * 4;
@@@ -10799,14 -10306,10 +10801,14 @@@ static void i9xx_update_cursor(struct d
         struct drm_device *dev = crtc->dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ +      const struct skl_wm_values *wm = &dev_priv->wm.skl_results;
         int pipe = intel_crtc->pipe;
         uint32_t cntl = 0;
   
- -      if (plane_state && plane_state->visible) {
+ +      if (INTEL_GEN(dev_priv) >= 9 && wm->dirty_pipes & drm_crtc_mask(crtc))
+ +              skl_write_cursor_wm(intel_crtc, wm);
+ +
+ +      if (plane_state && plane_state->base.visible) {
                 cntl = MCURSOR_GAMMA_ENABLE;
                 switch (plane_state->base.crtc_w) {
                         case 64:
@@@ -10827,7 -10330,7 +10829,7 @@@
                 if (HAS_DDI(dev))
                         cntl |= CURSOR_PIPE_CSC_ENABLE;
   
- -              if (plane_state->base.rotation == BIT(DRM_ROTATE_180))
+ +              if (plane_state->base.rotation == DRM_ROTATE_180)
                         cntl |= CURSOR_ROTATE_180;
         }
   
@@@ -10873,7 -10376,7 +10875,7 @@@ static void intel_crtc_update_cursor(st
   
                 /* ILK+ do this automagically */
                 if (HAS_GMCH_DISPLAY(dev) &&
- -                  plane_state->base.rotation == BIT(DRM_ROTATE_180)) {
+ +                  plane_state->base.rotation == DRM_ROTATE_180) {
                         base += (plane_state->base.crtc_h *
                                  plane_state->base.crtc_w - 1) * 4;
                 }
@@@ -11006,7 -10509,7 +11008,7 @@@ intel_framebuffer_create_for_mode(struc
   
         fb = intel_framebuffer_create(dev, &mode_cmd, obj);
         if (IS_ERR(fb))
- -              drm_gem_object_unreference_unlocked(&obj->base);
+ +              i915_gem_object_put_unlocked(obj);
   
         return fb;
   }
@@@ -11517,13 -11020,13 +11519,13 @@@ static void intel_unpin_work_fn(struct 
   
         mutex_lock(&dev->struct_mutex);
         intel_unpin_fb_obj(work->old_fb, primary->state->rotation);
- -      drm_gem_object_unreference(&work->pending_flip_obj->base);
- -
- -      if (work->flip_queued_req)
- -              i915_gem_request_assign(&work->flip_queued_req, NULL);
+ +      i915_gem_object_put(work->pending_flip_obj);
         mutex_unlock(&dev->struct_mutex);
   
- -      intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit);
+ +      i915_gem_request_put(work->flip_queued_req);
+ +
+ +      intel_frontbuffer_flip_complete(to_i915(dev),
+ +                                      to_intel_plane(primary)->frontbuffer_bit);
         intel_fbc_post_update(crtc);
         drm_framebuffer_unreference(work->old_fb);
   
@@@ -11544,8 -11047,10 +11546,8 @@@ static bool __pageflip_finished_cs(stru
   {
         struct drm_device *dev = crtc->base.dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
- -      unsigned reset_counter;
   
- -      reset_counter = i915_reset_counter(&dev_priv->gpu_error);
- -      if (crtc->reset_counter != reset_counter)
+ +      if (abort_flip_on_reset(crtc))
                 return true;
   
         /*
@@@ -11686,7 -11191,7 +11688,7 @@@ static int intel_gen2_queue_flip(struc
                                  struct drm_i915_gem_request *req,
                                  uint32_t flags)
   {
- -      struct intel_engine_cs *engine = req->engine;
+ +      struct intel_ring *ring = req->ring;
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
         u32 flip_mask;
         int ret;
@@@ -11702,13 -11207,13 +11704,13 @@@
                 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
         else
                 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
- -      intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
- -      intel_ring_emit(engine, MI_NOOP);
- -      intel_ring_emit(engine, MI_DISPLAY_FLIP |
+ +      intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
+ +      intel_ring_emit(ring, MI_NOOP);
+ +      intel_ring_emit(ring, MI_DISPLAY_FLIP |
                         MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
- -      intel_ring_emit(engine, fb->pitches[0]);
- -      intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
- -      intel_ring_emit(engine, 0); /* aux display base address, unused */
+ +      intel_ring_emit(ring, fb->pitches[0]);
+ +      intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
+ +      intel_ring_emit(ring, 0); /* aux display base address, unused */
   
         return 0;
   }
@@@ -11720,7 -11225,7 +11722,7 @@@ static int intel_gen3_queue_flip(struc
                                  struct drm_i915_gem_request *req,
                                  uint32_t flags)
   {
- -      struct intel_engine_cs *engine = req->engine;
+ +      struct intel_ring *ring = req->ring;
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
         u32 flip_mask;
         int ret;
@@@ -11733,13 -11238,13 +11735,13 @@@
                 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
         else
                 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
- -      intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
- -      intel_ring_emit(engine, MI_NOOP);
- -      intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 |
+ +      intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
+ +      intel_ring_emit(ring, MI_NOOP);
+ +      intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 |
                         MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
- -      intel_ring_emit(engine, fb->pitches[0]);
- -      intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
- -      intel_ring_emit(engine, MI_NOOP);
+ +      intel_ring_emit(ring, fb->pitches[0]);
+ +      intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
+ +      intel_ring_emit(ring, MI_NOOP);
   
         return 0;
   }
@@@ -11751,7 -11256,7 +11753,7 @@@ static int intel_gen4_queue_flip(struc
                                  struct drm_i915_gem_request *req,
                                  uint32_t flags)
   {
- -      struct intel_engine_cs *engine = req->engine;
+ +      struct intel_ring *ring = req->ring;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
         uint32_t pf, pipesrc;
@@@ -11765,11 -11270,11 +11767,11 @@@
          * Display Registers (which do not change across a page-flip)
          * so we need only reprogram the base address.
          */
- -      intel_ring_emit(engine, MI_DISPLAY_FLIP |
+ +      intel_ring_emit(ring, MI_DISPLAY_FLIP |
                         MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
- -      intel_ring_emit(engine, fb->pitches[0]);
- -      intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset |
- -                      obj->tiling_mode);
+ +      intel_ring_emit(ring, fb->pitches[0]);
+ +      intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset |
+ +                      intel_fb_modifier_to_tiling(fb->modifier[0]));
   
         /* XXX Enabling the panel-fitter across page-flip is so far
          * untested on non-native modes, so ignore it for now.
@@@ -11777,7 -11282,7 +11779,7 @@@
          */
         pf = 0;
         pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
- -      intel_ring_emit(engine, pf | pipesrc);
+ +      intel_ring_emit(ring, pf | pipesrc);
   
         return 0;
   }
@@@ -11789,7 -11294,7 +11791,7 @@@ static int intel_gen6_queue_flip(struc
                                  struct drm_i915_gem_request *req,
                                  uint32_t flags)
   {
- -      struct intel_engine_cs *engine = req->engine;
+ +      struct intel_ring *ring = req->ring;
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
         uint32_t pf, pipesrc;
@@@ -11799,11 -11304,10 +11801,11 @@@
         if (ret)
                 return ret;
   
- -      intel_ring_emit(engine, MI_DISPLAY_FLIP |
+ +      intel_ring_emit(ring, MI_DISPLAY_FLIP |
                         MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
- -      intel_ring_emit(engine, fb->pitches[0] | obj->tiling_mode);
- -      intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
+ +      intel_ring_emit(ring, fb->pitches[0] |
+ +                      intel_fb_modifier_to_tiling(fb->modifier[0]));
+ +      intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
   
         /* Contrary to the suggestions in the documentation,
          * "Enable Panel Fitter" does not seem to be required when page
@@@ -11813,7 -11317,7 +11815,7 @@@
          */
         pf = 0;
         pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
- -      intel_ring_emit(engine, pf | pipesrc);
+ +      intel_ring_emit(ring, pf | pipesrc);
   
         return 0;
   }
@@@ -11825,7 -11329,7 +11827,7 @@@ static int intel_gen7_queue_flip(struc
                                  struct drm_i915_gem_request *req,
                                  uint32_t flags)
   {
- -      struct intel_engine_cs *engine = req->engine;
+ +      struct intel_ring *ring = req->ring;
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
         uint32_t plane_bit = 0;
         int len, ret;
@@@ -11846,7 -11350,7 +11848,7 @@@
         }
   
         len = 4;
- -      if (engine->id == RCS) {
+ +      if (req->engine->id == RCS) {
                 len += 6;
                 /*
                  * On Gen 8, SRM is now taking an extra dword to accommodate
@@@ -11884,32 -11388,30 +11886,32 @@@
          * for the RCS also doesn't appear to drop events. Setting the DERRMR
          * to zero does lead to lockups within MI_DISPLAY_FLIP.
          */
- -      if (engine->id == RCS) {
- -              intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
- -              intel_ring_emit_reg(engine, DERRMR);
- -              intel_ring_emit(engine, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
+ +      if (req->engine->id == RCS) {
+ +              intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ +              intel_ring_emit_reg(ring, DERRMR);
+ +              intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
                                           DERRMR_PIPEB_PRI_FLIP_DONE |
                                           DERRMR_PIPEC_PRI_FLIP_DONE));
                 if (IS_GEN8(dev))
- -                      intel_ring_emit(engine, MI_STORE_REGISTER_MEM_GEN8 |
+ +                      intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 |
                                               MI_SRM_LRM_GLOBAL_GTT);
                 else
- -                      intel_ring_emit(engine, MI_STORE_REGISTER_MEM |
+ +                      intel_ring_emit(ring, MI_STORE_REGISTER_MEM |
                                               MI_SRM_LRM_GLOBAL_GTT);
- -              intel_ring_emit_reg(engine, DERRMR);
- -              intel_ring_emit(engine, engine->scratch.gtt_offset + 256);
+ +              intel_ring_emit_reg(ring, DERRMR);
+ +              intel_ring_emit(ring,
+ +                              i915_ggtt_offset(req->engine->scratch) + 256);
                 if (IS_GEN8(dev)) {
- -                      intel_ring_emit(engine, 0);
- -                      intel_ring_emit(engine, MI_NOOP);
+ +                      intel_ring_emit(ring, 0);
+ +                      intel_ring_emit(ring, MI_NOOP);
                 }
         }
   
- -      intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | plane_bit);
- -      intel_ring_emit(engine, (fb->pitches[0] | obj->tiling_mode));
- -      intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
- -      intel_ring_emit(engine, (MI_NOOP));
+ +      intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
+ +      intel_ring_emit(ring, fb->pitches[0] |
+ +                      intel_fb_modifier_to_tiling(fb->modifier[0]));
+ +      intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
+ +      intel_ring_emit(ring, (MI_NOOP));
   
         return 0;
   }
@@@ -11944,8 -11446,7 +11946,8 @@@ static bool use_mmio_flip(struct intel_
         if (resv && !reservation_object_test_signaled_rcu(resv, false))
                 return true;
   
- -      return engine != i915_gem_request_get_engine(obj->last_write_req);
+ +      return engine != i915_gem_active_get_engine(&obj->last_write,
+ +                                                  &obj->base.dev->struct_mutex);
   }
   
   static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
@@@ -11956,7 -11457,7 +11958,7 @@@
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
         const enum pipe pipe = intel_crtc->pipe;
- -      u32 ctl, stride, tile_height;
+ +      u32 ctl, stride = skl_plane_stride(fb, 0, rotation);
   
         ctl = I915_READ(PLANE_CTL(pipe, 0));
         ctl &= ~PLANE_CTL_TILED_MASK;
@@@ -11976,6 -11477,20 +11978,6 @@@
                 MISSING_CASE(fb->modifier[0]);
         }
   
- -      /*
- -       * The stride is either expressed as a multiple of 64 bytes chunks for
- -       * linear buffers or in number of tiles for tiled buffers.
- -       */
- -      if (intel_rotation_90_or_270(rotation)) {
- -              /* stride = Surface height in tiles */
- -              tile_height = intel_tile_height(dev_priv, fb->modifier[0], 0);
- -              stride = DIV_ROUND_UP(fb->height, tile_height);
- -      } else {
- -              stride = fb->pitches[0] /
- -                      intel_fb_stride_alignment(dev_priv, fb->modifier[0],
- -                                                fb->pixel_format);
- -      }
- -
         /*
          * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on
          * PLANE_SURF updates, the update is then guaranteed to be atomic.
@@@ -11992,13 -11507,15 +11994,13 @@@ static void ilk_do_mmio_flip(struct int
   {
         struct drm_device *dev = intel_crtc->base.dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
- -      struct intel_framebuffer *intel_fb =
- -              to_intel_framebuffer(intel_crtc->base.primary->fb);
- -      struct drm_i915_gem_object *obj = intel_fb->obj;
+ +      struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
         i915_reg_t reg = DSPCNTR(intel_crtc->plane);
         u32 dspcntr;
   
         dspcntr = I915_READ(reg);
   
- -      if (obj->tiling_mode != I915_TILING_NONE)
+ +      if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
                 dspcntr |= DISPPLANE_TILED;
         else
                 dspcntr &= ~DISPPLANE_TILED;
@@@ -12021,8 -11538,9 +12023,8 @@@ static void intel_mmio_flip_work_func(s
         struct reservation_object *resv;
   
         if (work->flip_queued_req)
- -              WARN_ON(__i915_wait_request(work->flip_queued_req,
- -                                          false, NULL,
- -                                          &dev_priv->rps.mmioflips));
+ +              WARN_ON(i915_wait_request(work->flip_queued_req,
+ +                                        0, NULL, NO_WAITBOOST));
   
         /* For framebuffer backed by dmabuf, wait for fence */
         resv = i915_gem_object_get_dmabuf_resv(obj);
@@@ -12133,8 -11651,7 +12135,8 @@@ static int intel_crtc_page_flip(struct 
         struct intel_flip_work *work;
         struct intel_engine_cs *engine;
         bool mmio_flip;
- -      struct drm_i915_gem_request *request = NULL;
+ +      struct drm_i915_gem_request *request;
+ +      struct i915_vma *vma;
         int ret;
   
         /*
@@@ -12200,18 -11717,22 +12202,18 @@@
   
         /* Reference the objects for the scheduled work. */
         drm_framebuffer_reference(work->old_fb);
- -      drm_gem_object_reference(&obj->base);
   
         crtc->primary->fb = fb;
         update_state_fb(crtc->primary);
   
- -      intel_fbc_pre_update(intel_crtc, intel_crtc->config,
- -                           to_intel_plane_state(primary->state));
- -
- -      work->pending_flip_obj = obj;
+ +      work->pending_flip_obj = i915_gem_object_get(obj);
   
         ret = i915_mutex_lock_interruptible(dev);
         if (ret)
                 goto cleanup;
   
- -      intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
- -      if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+ +      intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error);
+ +      if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
                 ret = -EIO;
                 goto cleanup;
         }
@@@ -12223,14 -11744,13 +12225,14 @@@
   
         if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
                 engine = &dev_priv->engine[BCS];
- -              if (obj->tiling_mode != intel_fb_obj(work->old_fb)->tiling_mode)
+ +              if (fb->modifier[0] != old_fb->modifier[0])
                         /* vlv: DISPLAY_FLIP fails to change tiling */
                         engine = NULL;
         } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
                 engine = &dev_priv->engine[BCS];
         } else if (INTEL_INFO(dev)->gen >= 7) {
- -              engine = i915_gem_request_get_engine(obj->last_write_req);
+ +              engine = i915_gem_active_get_engine(&obj->last_write,
+ +                                                  &obj->base.dev->struct_mutex);
                 if (engine == NULL || engine->id != RCS)
                         engine = &dev_priv->engine[BCS];
         } else {
@@@ -12239,52 -11759,47 +12241,52 @@@
   
         mmio_flip = use_mmio_flip(engine, obj);
   
- -      /* When using CS flips, we want to emit semaphores between rings.
- -       * However, when using mmio flips we will create a task to do the
- -       * synchronisation, so all we want here is to pin the framebuffer
- -       * into the display plane and skip any waits.
- -       */
- -      if (!mmio_flip) {
- -              ret = i915_gem_object_sync(obj, engine, &request);
- -              if (!ret && !request) {
- -                      request = i915_gem_request_alloc(engine, NULL);
- -                      ret = PTR_ERR_OR_ZERO(request);
- -              }
- -
- -              if (ret)
- -                      goto cleanup_pending;
- -      }
- -
- -      ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
- -      if (ret)
+ +      vma = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
+ +      if (IS_ERR(vma)) {
+ +              ret = PTR_ERR(vma);
                 goto cleanup_pending;
+ +      }
   
- -      work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary),
- -                                                obj, 0);
+ +      work->gtt_offset = intel_fb_gtt_offset(fb, primary->state->rotation);
         work->gtt_offset += intel_crtc->dspaddr_offset;
         work->rotation = crtc->primary->state->rotation;
   
+ +      /*
+ +       * There's the potential that the next frame will not be compatible with
+ +       * FBC, so we want to call pre_update() before the actual page flip.
+ +       * The problem is that pre_update() caches some information about the fb
+ +       * object, so we want to do this only after the object is pinned. Let's
+ +       * be on the safe side and do this immediately before scheduling the
+ +       * flip.
+ +       */
+ +      intel_fbc_pre_update(intel_crtc, intel_crtc->config,
+ +                           to_intel_plane_state(primary->state));
+ +
         if (mmio_flip) {
                 INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
   
- -              i915_gem_request_assign(&work->flip_queued_req,
- -                                      obj->last_write_req);
- -
+ +              work->flip_queued_req = i915_gem_active_get(&obj->last_write,
+ +                                                          &obj->base.dev->struct_mutex);
                 schedule_work(&work->mmio_work);
         } else {
- -              i915_gem_request_assign(&work->flip_queued_req, request);
+ +              request = i915_gem_request_alloc(engine, engine->last_context);
+ +              if (IS_ERR(request)) {
+ +                      ret = PTR_ERR(request);
+ +                      goto cleanup_unpin;
+ +              }
+ +
+ +              ret = i915_gem_request_await_object(request, obj, false);
+ +              if (ret)
+ +                      goto cleanup_request;
+ +
                 ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request,
                                                    page_flip_flags);
                 if (ret)
- -                      goto cleanup_unpin;
+ +                      goto cleanup_request;
   
                 intel_mark_page_flip_active(intel_crtc, work);
   
+ +              work->flip_queued_req = i915_gem_request_get(request);
                 i915_add_request_no_flush(request);
         }
   
@@@ -12292,25 -11807,25 +12294,25 @@@
                           to_intel_plane(primary)->frontbuffer_bit);
         mutex_unlock(&dev->struct_mutex);
   
- -      intel_frontbuffer_flip_prepare(dev,
+ +      intel_frontbuffer_flip_prepare(to_i915(dev),
                                        to_intel_plane(primary)->frontbuffer_bit);
   
         trace_i915_flip_request(intel_crtc->plane, obj);
   
         return 0;
   
+ +cleanup_request:
+ +      i915_add_request_no_flush(request);
   cleanup_unpin:
         intel_unpin_fb_obj(fb, crtc->primary->state->rotation);
   cleanup_pending:
- -      if (!IS_ERR_OR_NULL(request))
- -              i915_add_request_no_flush(request);
         atomic_dec(&intel_crtc->unpin_work_count);
         mutex_unlock(&dev->struct_mutex);
   cleanup:
         crtc->primary->fb = old_fb;
         update_state_fb(crtc->primary);
   
- -      drm_gem_object_unreference_unlocked(&obj->base);
+ +      i915_gem_object_put_unlocked(obj);
         drm_framebuffer_unreference(work->old_fb);
   
         spin_lock_irq(&dev->event_lock);
@@@ -12378,7 -11893,7 +12380,7 @@@ static bool intel_wm_need_update(struc
         struct intel_plane_state *cur = to_intel_plane_state(plane->state);
   
         /* Update watermarks on tiling or size changes. */
- -      if (new->visible != cur->visible)
+ +      if (new->base.visible != cur->base.visible)
                 return true;
   
         if (!cur->base.fb || !new->base.fb)
@@@ -12386,10 -11901,10 +12388,10 @@@
   
         if (cur->base.fb->modifier[0] != new->base.fb->modifier[0] ||
             cur->base.rotation != new->base.rotation ||
- -          drm_rect_width(&new->src) != drm_rect_width(&cur->src) ||
- -          drm_rect_height(&new->src) != drm_rect_height(&cur->src) ||
- -          drm_rect_width(&new->dst) != drm_rect_width(&cur->dst) ||
- -          drm_rect_height(&new->dst) != drm_rect_height(&cur->dst))
+ +          drm_rect_width(&new->base.src) != drm_rect_width(&cur->base.src) ||
+ +          drm_rect_height(&new->base.src) != drm_rect_height(&cur->base.src) ||
+ +          drm_rect_width(&new->base.dst) != drm_rect_width(&cur->base.dst) ||
+ +          drm_rect_height(&new->base.dst) != drm_rect_height(&cur->base.dst))
                 return true;
   
         return false;
@@@ -12397,10 -11912,10 +12399,10 @@@
   
   static bool needs_scaling(struct intel_plane_state *state)
   {
- -      int src_w = drm_rect_width(&state->src) >> 16;
- -      int src_h = drm_rect_height(&state->src) >> 16;
- -      int dst_w = drm_rect_width(&state->dst);
- -      int dst_h = drm_rect_height(&state->dst);
+ +      int src_w = drm_rect_width(&state->base.src) >> 16;
+ +      int src_h = drm_rect_height(&state->base.src) >> 16;
+ +      int dst_w = drm_rect_width(&state->base.dst);
+ +      int dst_h = drm_rect_height(&state->base.dst);
   
         return (src_w != dst_w || src_h != dst_h);
   }
@@@ -12431,8 -11946,8 +12433,8 @@@ int intel_plane_atomic_calc_changes(str
                         return ret;
         }
   
- -      was_visible = old_plane_state->visible;
- -      visible = to_intel_plane_state(plane_state)->visible;
+ +      was_visible = old_plane_state->base.visible;
+ +      visible = to_intel_plane_state(plane_state)->base.visible;
   
         if (!was_crtc_enabled && WARN_ON(was_visible))
                 was_visible = false;
@@@ -12448,7 -11963,7 +12450,7 @@@
          * only combine the results from all planes in the current place?
          */
         if (!is_crtc_enabled)
- -              to_intel_plane_state(plane_state)->visible = visible = false;
+ +              to_intel_plane_state(plane_state)->base.visible = visible = false;
   
         if (!was_visible && !visible)
                 return 0;
@@@ -12786,9 -12301,10 +12788,9 @@@ static void intel_dump_pipe_config(stru
         DRM_DEBUG_KMS("double wide: %i\n", pipe_config->double_wide);
   
         if (IS_BROXTON(dev)) {
- -              DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: ebb0: 0x%x, ebb4: 0x%x,"
+ +              DRM_DEBUG_KMS("dpll_hw_state: ebb0: 0x%x, ebb4: 0x%x,"
                               "pll0: 0x%x, pll1: 0x%x, pll2: 0x%x, pll3: 0x%x, "
                               "pll6: 0x%x, pll8: 0x%x, pll9: 0x%x, pll10: 0x%x, pcsdw12: 0x%x\n",
- -                            pipe_config->ddi_pll_sel,
                               pipe_config->dpll_hw_state.ebb0,
                               pipe_config->dpll_hw_state.ebb4,
                               pipe_config->dpll_hw_state.pll0,
@@@ -12801,13 -12317,15 +12803,13 @@@
                               pipe_config->dpll_hw_state.pll10,
                               pipe_config->dpll_hw_state.pcsdw12);
         } else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
- -              DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: "
+ +              DRM_DEBUG_KMS("dpll_hw_state: "
                               "ctrl1: 0x%x, cfgcr1: 0x%x, cfgcr2: 0x%x\n",
- -                            pipe_config->ddi_pll_sel,
                               pipe_config->dpll_hw_state.ctrl1,
                               pipe_config->dpll_hw_state.cfgcr1,
                               pipe_config->dpll_hw_state.cfgcr2);
         } else if (HAS_DDI(dev)) {
- -              DRM_DEBUG_KMS("ddi_pll_sel: 0x%x; dpll_hw_state: wrpll: 0x%x spll: 0x%x\n",
- -                            pipe_config->ddi_pll_sel,
+ +              DRM_DEBUG_KMS("dpll_hw_state: wrpll: 0x%x spll: 0x%x\n",
                               pipe_config->dpll_hw_state.wrpll,
                               pipe_config->dpll_hw_state.spll);
         } else {
@@@ -12821,7 -12339,6 +12823,7 @@@
   
         DRM_DEBUG_KMS("planes on this crtc\n");
         list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+ +              char *format_name;
                 intel_plane = to_intel_plane(plane);
                 if (intel_plane->pipe != crtc->pipe)
                         continue;
@@@ -12834,23 -12351,19 +12836,23 @@@
                         continue;
                 }
   
+ +              format_name = drm_get_format_name(fb->pixel_format);
+ +
                 DRM_DEBUG_KMS("[PLANE:%d:%s] enabled",
                               plane->base.id, plane->name);
                 DRM_DEBUG_KMS("\tFB:%d, fb = %ux%u format = %s",
- -                            fb->base.id, fb->width, fb->height,
- -                            drm_get_format_name(fb->pixel_format));
+ +                            fb->base.id, fb->width, fb->height, format_name);
                 DRM_DEBUG_KMS("\tscaler:%d src %dx%d+%d+%d dst %dx%d+%d+%d\n",
                               state->scaler_id,
- -                            state->src.x1 >> 16, state->src.y1 >> 16,
- -                            drm_rect_width(&state->src) >> 16,
- -                            drm_rect_height(&state->src) >> 16,
- -                            state->dst.x1, state->dst.y1,
- -                            drm_rect_width(&state->dst),
- -                            drm_rect_height(&state->dst));
+ +                            state->base.src.x1 >> 16,
+ +                            state->base.src.y1 >> 16,
+ +                            drm_rect_width(&state->base.src) >> 16,
+ +                            drm_rect_height(&state->base.src) >> 16,
+ +                            state->base.dst.x1, state->base.dst.y1,
+ +                            drm_rect_width(&state->base.dst),
+ +                            drm_rect_height(&state->base.dst));
+ +
+ +              kfree(format_name);
         }
   }
   
@@@ -12859,7 -12372,6 +12861,7 @@@ static bool check_digital_port_conflict
         struct drm_device *dev = state->dev;
         struct drm_connector *connector;
         unsigned int used_ports = 0;
+ +      unsigned int used_mst_ports = 0;
   
         /*
          * Walk the connector list instead of the encoder
@@@ -12896,20 -12408,11 +12898,20 @@@
                                 return false;
   
                         used_ports |= port_mask;
+ +                      break;
+ +              case INTEL_OUTPUT_DP_MST:
+ +                      used_mst_ports |=
+ +                              1 << enc_to_mst(&encoder->base)->primary->port;
+ +                      break;
                 default:
                         break;
                 }
         }
   
+ +      /* can't mix MST and SST/HDMI on the same port */
+ +      if (used_ports & used_mst_ports)
+ +              return false;
+ +
         return true;
   }
   
@@@ -12920,6 -12423,7 +12922,6 @@@ clear_intel_crtc_state(struct intel_crt
         struct intel_crtc_scaler_state scaler_state;
         struct intel_dpll_hw_state dpll_hw_state;
         struct intel_shared_dpll *shared_dpll;
- -      uint32_t ddi_pll_sel;
         bool force_thru;
   
         /* FIXME: before the switch to atomic started, a new pipe_config was
@@@ -12931,6 -12435,7 +12933,6 @@@
         scaler_state = crtc_state->scaler_state;
         shared_dpll = crtc_state->shared_dpll;
         dpll_hw_state = crtc_state->dpll_hw_state;
- -      ddi_pll_sel = crtc_state->ddi_pll_sel;
         force_thru = crtc_state->pch_pfit.force_thru;
   
         memset(crtc_state, 0, sizeof *crtc_state);
@@@ -12939,6 -12444,7 +12941,6 @@@
         crtc_state->scaler_state = scaler_state;
         crtc_state->shared_dpll = shared_dpll;
         crtc_state->dpll_hw_state = dpll_hw_state;
- -      crtc_state->ddi_pll_sel = ddi_pll_sel;
         crtc_state->pch_pfit.force_thru = force_thru;
   }
   
@@@ -13026,7 -12532,7 +13028,7 @@@ encoder_retry
   
                 encoder = to_intel_encoder(connector_state->best_encoder);
   
- -              if (!(encoder->compute_config(encoder, pipe_config))) {
+ +              if (!(encoder->compute_config(encoder, pipe_config, connector_state))) {
                         DRM_DEBUG_KMS("Encoder config failure\n");
                         goto fail;
                 }
@@@ -13114,6 -12620,12 +13116,6 @@@ static bool intel_fuzzy_clock_check(in
         return false;
   }
   
- -#define for_each_intel_crtc_masked(dev, mask, intel_crtc) \
- -      list_for_each_entry((intel_crtc), \
- -                          &(dev)->mode_config.crtc_list, \
- -                          base.head) \
- -              for_each_if (mask & (1 <<(intel_crtc)->pipe))
- -
   static bool
   intel_compare_m_n(unsigned int m, unsigned int n,
                   unsigned int m2, unsigned int n2,
@@@ -13361,6 -12873,8 +13363,6 @@@ intel_pipe_config_compare(struct drm_de
   
         PIPE_CONF_CHECK_I(double_wide);
   
- -      PIPE_CONF_CHECK_X(ddi_pll_sel);
- -
         PIPE_CONF_CHECK_P(shared_dpll);
         PIPE_CONF_CHECK_X(dpll_hw_state.dpll);
         PIPE_CONF_CHECK_X(dpll_hw_state.dpll_md);
@@@ -13442,23 -12956,16 +13444,23 @@@ static void verify_wm_state(struct drm_
                           hw_entry->start, hw_entry->end);
         }
   
- -      /* cursor */
- -      hw_entry = &hw_ddb.plane[pipe][PLANE_CURSOR];
- -      sw_entry = &sw_ddb->plane[pipe][PLANE_CURSOR];
- -
- -      if (!skl_ddb_entry_equal(hw_entry, sw_entry)) {
- -              DRM_ERROR("mismatch in DDB state pipe %c cursor "
- -                        "(expected (%u,%u), found (%u,%u))\n",
- -                        pipe_name(pipe),
- -                        sw_entry->start, sw_entry->end,
- -                        hw_entry->start, hw_entry->end);
+ +      /*
+ +       * cursor
+ +       * If the cursor plane isn't active, we may not have updated it's ddb
+ +       * allocation. In that case since the ddb allocation will be updated
+ +       * once the plane becomes visible, we can skip this check
+ +       */
+ +      if (intel_crtc->cursor_addr) {
+ +              hw_entry = &hw_ddb.plane[pipe][PLANE_CURSOR];
+ +              sw_entry = &sw_ddb->plane[pipe][PLANE_CURSOR];
+ +
+ +              if (!skl_ddb_entry_equal(hw_entry, sw_entry)) {
+ +                      DRM_ERROR("mismatch in DDB state pipe %c cursor "
+ +                                "(expected (%u,%u), found (%u,%u))\n",
+ +                                pipe_name(pipe),
+ +                                sw_entry->start, sw_entry->end,
+ +                                hw_entry->start, hw_entry->end);
+ +              }
         }
   }
   
@@@ -14073,9 -13580,8 +14075,9 @@@ static int intel_atomic_prepare_commit(
                         if (!intel_plane_state->wait_req)
                                 continue;
   
- -                      ret = __i915_wait_request(intel_plane_state->wait_req,
- -                                                true, NULL, NULL);
+ +                      ret = i915_wait_request(intel_plane_state->wait_req,
+ +                                              I915_WAIT_INTERRUPTIBLE,
+ +                                              NULL, NULL);
                         if (ret) {
                                 /* Any hang should be swallowed by the wait */
                                 WARN_ON(ret == -EIO);
@@@ -14165,111 -13671,6 +14167,111 @@@ static bool needs_vblank_wait(struct in
         return false;
   }
   
+ +static void intel_update_crtc(struct drm_crtc *crtc,
+ +                            struct drm_atomic_state *state,
+ +                            struct drm_crtc_state *old_crtc_state,
+ +                            unsigned int *crtc_vblank_mask)
+ +{
+ +      struct drm_device *dev = crtc->dev;
+ +      struct drm_i915_private *dev_priv = to_i915(dev);
+ +      struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ +      struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc->state);
+ +      bool modeset = needs_modeset(crtc->state);
+ +
+ +      if (modeset) {
+ +              update_scanline_offset(intel_crtc);
+ +              dev_priv->display.crtc_enable(pipe_config, state);
+ +      } else {
+ +              intel_pre_plane_update(to_intel_crtc_state(old_crtc_state));
+ +      }
+ +
+ +      if (drm_atomic_get_existing_plane_state(state, crtc->primary)) {
+ +              intel_fbc_enable(
+ +                  intel_crtc, pipe_config,
+ +                  to_intel_plane_state(crtc->primary->state));
+ +      }
+ +
+ +      drm_atomic_helper_commit_planes_on_crtc(old_crtc_state);
+ +
+ +      if (needs_vblank_wait(pipe_config))
+ +              *crtc_vblank_mask |= drm_crtc_mask(crtc);
+ +}
+ +
+ +static void intel_update_crtcs(struct drm_atomic_state *state,
+ +                             unsigned int *crtc_vblank_mask)
+ +{
+ +      struct drm_crtc *crtc;
+ +      struct drm_crtc_state *old_crtc_state;
+ +      int i;
+ +
+ +      for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
+ +              if (!crtc->state->active)
+ +                      continue;
+ +
+ +              intel_update_crtc(crtc, state, old_crtc_state,
+ +                                crtc_vblank_mask);
+ +      }
+ +}
+ +
+ +static void skl_update_crtcs(struct drm_atomic_state *state,
+ +                           unsigned int *crtc_vblank_mask)
+ +{
+ +      struct drm_device *dev = state->dev;
+ +      struct drm_i915_private *dev_priv = to_i915(dev);
+ +      struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+ +      struct drm_crtc *crtc;
+ +      struct drm_crtc_state *old_crtc_state;
+ +      struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
+ +      struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
+ +      unsigned int updated = 0;
+ +      bool progress;
+ +      enum pipe pipe;
+ +
+ +      /*
+ +       * Whenever the number of active pipes changes, we need to make sure we
+ +       * update the pipes in the right order so that their ddb allocations
+ +       * never overlap with eachother inbetween CRTC updates. Otherwise we'll
+ +       * cause pipe underruns and other bad stuff.
+ +       */
+ +      do {
+ +              int i;
+ +              progress = false;
+ +
+ +              for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
+ +                      bool vbl_wait = false;
+ +                      unsigned int cmask = drm_crtc_mask(crtc);
+ +                      pipe = to_intel_crtc(crtc)->pipe;
+ +
+ +                      if (updated & cmask || !crtc->state->active)
+ +                              continue;
+ +                      if (skl_ddb_allocation_overlaps(state, cur_ddb, new_ddb,
+ +                                                      pipe))
+ +                              continue;
+ +
+ +                      updated |= cmask;
+ +
+ +                      /*
+ +                       * If this is an already active pipe, it's DDB changed,
+ +                       * and this isn't the last pipe that needs updating
+ +                       * then we need to wait for a vblank to pass for the
+ +                       * new ddb allocation to take effect.
+ +                       */
+ +                      if (!skl_ddb_allocation_equals(cur_ddb, new_ddb, pipe) &&
+ +                          !crtc->state->active_changed &&
+ +                          intel_state->wm_results.dirty_pipes != updated)
+ +                              vbl_wait = true;
+ +
+ +                      intel_update_crtc(crtc, state, old_crtc_state,
+ +                                        crtc_vblank_mask);
+ +
+ +                      if (vbl_wait)
+ +                              intel_wait_for_vblank(dev, pipe);
+ +
+ +                      progress = true;
+ +              }
+ +      } while (progress);
+ +}
+ +
   static void intel_atomic_commit_tail(struct drm_atomic_state *state)
   {
         struct drm_device *dev = state->dev;
@@@ -14292,8 -13693,8 +14294,8 @@@
                 if (!intel_plane_state->wait_req)
                         continue;
   
- -              ret = __i915_wait_request(intel_plane_state->wait_req,
- -                                        true, NULL, NULL);
+ +              ret = i915_wait_request(intel_plane_state->wait_req,
+ +                                      0, NULL, NULL);
                 /* EIO should be eaten, and we can't get interrupted in the
                  * worker, and blocking commits have waited already. */
                 WARN_ON(ret);
@@@ -14329,7 -13730,7 +14331,7 @@@
   
                 if (old_crtc_state->active) {
                         intel_crtc_disable_planes(crtc, old_crtc_state->plane_mask);
- -                      dev_priv->display.crtc_disable(crtc);
+ +                      dev_priv->display.crtc_disable(to_intel_crtc_state(old_crtc_state), state);
                         intel_crtc->active = false;
                         intel_fbc_disable(intel_crtc);
                         intel_disable_shared_dpll(intel_crtc);
@@@ -14368,9 -13769,17 +14370,9 @@@
                 intel_modeset_verify_disabled(dev);
         }
   
- -      /* Now enable the clocks, plane, pipe, and connectors that we set up. */
+ +      /* Complete the events for pipes that have now been disabled */
         for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
- -              struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
                 bool modeset = needs_modeset(crtc->state);
- -              struct intel_crtc_state *pipe_config =
- -                      to_intel_crtc_state(crtc->state);
- -
- -              if (modeset && crtc->state->active) {
- -                      update_scanline_offset(to_intel_crtc(crtc));
- -                      dev_priv->display.crtc_enable(crtc);
- -              }
   
                 /* Complete events for now disable pipes here. */
                 if (modeset && !crtc->state->active && crtc->state->event) {
@@@ -14380,11 -13789,21 +14382,11 @@@
   
                         crtc->state->event = NULL;
                 }
- -
- -              if (!modeset)
- -                      intel_pre_plane_update(to_intel_crtc_state(old_crtc_state));
- -
- -              if (crtc->state->active &&
- -                  drm_atomic_get_existing_plane_state(state, crtc->primary))
- -                      intel_fbc_enable(intel_crtc, pipe_config, to_intel_plane_state(crtc->primary->state));
- -
- -              if (crtc->state->active)
- -                      drm_atomic_helper_commit_planes_on_crtc(old_crtc_state);
- -
- -              if (pipe_config->base.active && needs_vblank_wait(pipe_config))
- -                      crtc_vblank_mask |= 1 << i;
         }
   
+ +      /* Now enable the clocks, plane, pipe, and connectors that we set up. */
+ +      dev_priv->display.update_crtcs(state, &crtc_vblank_mask);
+ +
         /* FIXME: We should call drm_atomic_helper_commit_hw_done() here
          * already, but still need the state for the delayed optimization. To
          * fix this:
@@@ -14463,12 -13882,19 +14465,12 @@@ static void intel_atomic_track_fbs(stru
   {
         struct drm_plane_state *old_plane_state;
         struct drm_plane *plane;
- -      struct drm_i915_gem_object *obj, *old_obj;
- -      struct intel_plane *intel_plane;
         int i;
   
- -      mutex_lock(&state->dev->struct_mutex);
- -      for_each_plane_in_state(state, plane, old_plane_state, i) {
- -              obj = intel_fb_obj(plane->state->fb);
- -              old_obj = intel_fb_obj(old_plane_state->fb);
- -              intel_plane = to_intel_plane(plane);
- -
- -              i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
- -      }
- -      mutex_unlock(&state->dev->struct_mutex);
+ +      for_each_plane_in_state(state, plane, old_plane_state, i)
+ +              i915_gem_track_fb(intel_fb_obj(old_plane_state->fb),
+ +                                intel_fb_obj(plane->state->fb),
+ +                                to_intel_plane(plane)->frontbuffer_bit);
   }
   
   /**
@@@ -14564,6 -13990,8 +14566,6 @@@ out
                 drm_atomic_state_free(state);
   }
   
- -#undef for_each_intel_crtc_masked
- -
   /*
    * FIXME: Remove this once i915 is fully DRIVER_ATOMIC by calling
    *        drm_atomic_helper_legacy_gamma_set() directly.
@@@ -14632,7 -14060,7 +14634,7 @@@ static const struct drm_crtc_funcs inte
    */
   int
   intel_prepare_plane_fb(struct drm_plane *plane,
- -                     const struct drm_plane_state *new_state)
+ +                     struct drm_plane_state *new_state)
   {
         struct drm_device *dev = plane->dev;
         struct drm_framebuffer *fb = new_state->fb;
@@@ -14691,17 -14119,15 +14693,17 @@@
                 if (ret)
                         DRM_DEBUG_KMS("failed to attach phys object\n");
         } else {
- -              ret = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+ +              struct i915_vma *vma;
+ +
+ +              vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+ +              if (IS_ERR(vma))
+ +                      ret = PTR_ERR(vma);
         }
   
         if (ret == 0) {
- -              struct intel_plane_state *plane_state =
- -                      to_intel_plane_state(new_state);
- -
- -              i915_gem_request_assign(&plane_state->wait_req,
- -                                      obj->last_write_req);
+ +              to_intel_plane_state(new_state)->wait_req =
+ +                      i915_gem_active_get(&obj->last_write,
+ +                                          &obj->base.dev->struct_mutex);
         }
   
         return ret;
@@@ -14718,11 -14144,10 +14720,11 @@@
    */
   void
   intel_cleanup_plane_fb(struct drm_plane *plane,
- -                     const struct drm_plane_state *old_state)
+ +                     struct drm_plane_state *old_state)
   {
         struct drm_device *dev = plane->dev;
         struct intel_plane_state *old_intel_state;
+ +      struct intel_plane_state *intel_state = to_intel_plane_state(plane->state);
         struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
         struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
   
@@@ -14735,7 -14160,6 +14737,7 @@@
             !INTEL_INFO(dev)->cursor_needs_physical))
                 intel_unpin_fb_obj(old_state->fb, old_state->rotation);
   
+ +      i915_gem_request_assign(&intel_state->wait_req, NULL);
         i915_gem_request_assign(&old_intel_state->wait_req, NULL);
   }
   
@@@ -14770,14 -14194,13 +14772,14 @@@ intel_check_primary_plane(struct drm_pl
                           struct intel_crtc_state *crtc_state,
                           struct intel_plane_state *state)
   {
+ +      struct drm_i915_private *dev_priv = to_i915(plane->dev);
         struct drm_crtc *crtc = state->base.crtc;
- -      struct drm_framebuffer *fb = state->base.fb;
         int min_scale = DRM_PLANE_HELPER_NO_SCALING;
         int max_scale = DRM_PLANE_HELPER_NO_SCALING;
         bool can_position = false;
+ +      int ret;
   
- -      if (INTEL_INFO(plane->dev)->gen >= 9) {
+ +      if (INTEL_GEN(dev_priv) >= 9) {
                 /* use scaler when colorkey is not required */
                 if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
                         min_scale = 1;
@@@ -14786,35 -14209,22 +14788,35 @@@
                 can_position = true;
         }
   
- -      return drm_plane_helper_check_update(plane, crtc, fb, &state->src,
- -                                           &state->dst, &state->clip,
- -                                           state->base.rotation,
- -                                           min_scale, max_scale,
- -                                           can_position, true,
- -                                           &state->visible);
+ +      ret = drm_plane_helper_check_state(&state->base,
+ +                                         &state->clip,
+ +                                         min_scale, max_scale,
+ +                                         can_position, true);
+ +      if (ret)
+ +              return ret;
+ +
+ +      if (!state->base.fb)
+ +              return 0;
+ +
+ +      if (INTEL_GEN(dev_priv) >= 9) {
+ +              ret = skl_check_plane_surface(state);
+ +              if (ret)
+ +                      return ret;
+ +      }
+ +
+ +      return 0;
   }
   
   static void intel_begin_crtc_commit(struct drm_crtc *crtc,
                                     struct drm_crtc_state *old_crtc_state)
   {
         struct drm_device *dev = crtc->dev;
+ +      struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
         struct intel_crtc_state *old_intel_state =
                 to_intel_crtc_state(old_crtc_state);
         bool modeset = needs_modeset(crtc->state);
+ +      enum pipe pipe = intel_crtc->pipe;
   
         /* Perform vblank evasion around commit operation */
         intel_pipe_update_start(intel_crtc);
@@@ -14829,12 -14239,8 +14831,12 @@@
   
         if (to_intel_crtc_state(crtc->state)->update_pipe)
                 intel_update_pipe_config(intel_crtc, old_intel_state);
- -      else if (INTEL_INFO(dev)->gen >= 9)
+ +      else if (INTEL_GEN(dev_priv) >= 9) {
                 skl_detach_scalers(intel_crtc);
+ +
+ +              I915_WRITE(PIPE_WM_LINETIME(pipe),
+ +                         dev_priv->wm.skl_hw.wm_linetime[pipe]);
+ +      }
   }
   
   static void intel_finish_crtc_commit(struct drm_crtc *crtc,
@@@ -14968,11 -14374,11 +14970,11 @@@ fail
   void intel_create_rotation_property(struct drm_device *dev, struct intel_plane *plane)
   {
         if (!dev->mode_config.rotation_property) {
- -              unsigned long flags = BIT(DRM_ROTATE_0) |
- -                      BIT(DRM_ROTATE_180);
+ +              unsigned long flags = DRM_ROTATE_0 |
+ +                      DRM_ROTATE_180;
   
                 if (INTEL_INFO(dev)->gen >= 9)
- -                      flags |= BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270);
+ +                      flags |= DRM_ROTATE_90 | DRM_ROTATE_270;
   
                 dev->mode_config.rotation_property =
                         drm_mode_create_rotation_property(dev, flags);
@@@ -14988,17 -14394,19 +14990,17 @@@ intel_check_cursor_plane(struct drm_pla
                          struct intel_crtc_state *crtc_state,
                          struct intel_plane_state *state)
   {
- -      struct drm_crtc *crtc = crtc_state->base.crtc;
         struct drm_framebuffer *fb = state->base.fb;
         struct drm_i915_gem_object *obj = intel_fb_obj(fb);
         enum pipe pipe = to_intel_plane(plane)->pipe;
         unsigned stride;
         int ret;
   
- -      ret = drm_plane_helper_check_update(plane, crtc, fb, &state->src,
- -                                          &state->dst, &state->clip,
- -                                          state->base.rotation,
- -                                          DRM_PLANE_HELPER_NO_SCALING,
- -                                          DRM_PLANE_HELPER_NO_SCALING,
- -                                          true, true, &state->visible);
+ +      ret = drm_plane_helper_check_state(&state->base,
+ +                                         &state->clip,
+ +                                         DRM_PLANE_HELPER_NO_SCALING,
+ +                                         DRM_PLANE_HELPER_NO_SCALING,
+ +                                         true, true);
         if (ret)
                 return ret;
   
@@@ -15035,7 -14443,7 +15037,7 @@@
          * Refuse the put the cursor into that compromised position.
          */
         if (IS_CHERRYVIEW(plane->dev) && pipe == PIPE_C &&
- -          state->visible && state->base.crtc_x < 0) {
+ +          state->base.visible && state->base.crtc_x < 0) {
                 DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
                 return -EINVAL;
         }
@@@ -15067,7 -14475,7 +15069,7 @@@ intel_update_cursor_plane(struct drm_pl
         if (!obj)
                 addr = 0;
         else if (!INTEL_INFO(dev)->cursor_needs_physical)
- -              addr = i915_gem_obj_ggtt_offset(obj);
+ +              addr = i915_gem_object_ggtt_offset(obj, NULL);
         else
                 addr = obj->phys_handle->busaddr;
   
@@@ -15113,8 -14521,8 +15115,8 @@@ static struct drm_plane *intel_cursor_p
                 if (!dev->mode_config.rotation_property)
                         dev->mode_config.rotation_property =
                                 drm_mode_create_rotation_property(dev,
- -                                                      BIT(DRM_ROTATE_0) |
- -                                                      BIT(DRM_ROTATE_180));
+ +                                                      DRM_ROTATE_0 |
+ +                                                      DRM_ROTATE_180);
                 if (dev->mode_config.rotation_property)
                         drm_object_attach_property(&cursor->base.base,
                                 dev->mode_config.rotation_property,
@@@ -15320,50 -14728,12 +15322,50 @@@ static bool intel_crt_present(struct dr
         return true;
   }
   
+ +void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv)
+ +{
+ +      int pps_num;
+ +      int pps_idx;
+ +
+ +      if (HAS_DDI(dev_priv))
+ +              return;
+ +      /*
+ +       * This w/a is needed at least on CPT/PPT, but to be sure apply it
+ +       * everywhere where registers can be write protected.
+ +       */
+ +      if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+ +              pps_num = 2;
+ +      else
+ +              pps_num = 1;
+ +
+ +      for (pps_idx = 0; pps_idx < pps_num; pps_idx++) {
+ +              u32 val = I915_READ(PP_CONTROL(pps_idx));
+ +
+ +              val = (val & ~PANEL_UNLOCK_MASK) | PANEL_UNLOCK_REGS;
+ +              I915_WRITE(PP_CONTROL(pps_idx), val);
+ +      }
+ +}
+ +
+ +static void intel_pps_init(struct drm_i915_private *dev_priv)
+ +{
+ +      if (HAS_PCH_SPLIT(dev_priv) || IS_BROXTON(dev_priv))
+ +              dev_priv->pps_mmio_base = PCH_PPS_BASE;
+ +      else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+ +              dev_priv->pps_mmio_base = VLV_PPS_BASE;
+ +      else
+ +              dev_priv->pps_mmio_base = PPS_BASE;
+ +
+ +      intel_pps_unlock_regs_wa(dev_priv);
+ +}
+ +
   static void intel_setup_outputs(struct drm_device *dev)
   {
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_encoder *encoder;
         bool dpd_is_edp = false;
   
+ +      intel_pps_init(dev_priv);
+ +
         /*
          * intel_edp_init_connector() depends on this completing first, to
          * prevent the registeration of both eDP and LVDS and the incorrect
@@@ -15551,7 -14921,7 +15553,7 @@@ static void intel_user_framebuffer_dest
         drm_framebuffer_cleanup(fb);
         mutex_lock(&dev->struct_mutex);
         WARN_ON(!intel_fb->obj->framebuffer_references--);
- -      drm_gem_object_unreference(&intel_fb->obj->base);
+ +      i915_gem_object_put(intel_fb->obj);
         mutex_unlock(&dev->struct_mutex);
         kfree(intel_fb);
   }
@@@ -15631,27 -15001,24 +15633,27 @@@ static int intel_framebuffer_init(struc
                                   struct drm_i915_gem_object *obj)
   {
         struct drm_i915_private *dev_priv = to_i915(dev);
- -      unsigned int aligned_height;
+ +      unsigned int tiling = i915_gem_object_get_tiling(obj);
         int ret;
         u32 pitch_limit, stride_alignment;
+ +      char *format_name;
   
         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
   
         if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
- -              /* Enforce that fb modifier and tiling mode match, but only for
- -               * X-tiled. This is needed for FBC. */
- -              if (!!(obj->tiling_mode == I915_TILING_X) !=
- -                  !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) {
+ +              /*
+ +               * If there's a fence, enforce that
+ +               * the fb modifier and tiling mode match.
+ +               */
+ +              if (tiling != I915_TILING_NONE &&
+ +                  tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
                         DRM_DEBUG("tiling_mode doesn't match fb modifier\n");
                         return -EINVAL;
                 }
         } else {
- -              if (obj->tiling_mode == I915_TILING_X)
+ +              if (tiling == I915_TILING_X) {
                         mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
- -              else if (obj->tiling_mode == I915_TILING_Y) {
+ +              } else if (tiling == I915_TILING_Y) {
                         DRM_DEBUG("No Y tiling for legacy addfb\n");
                         return -EINVAL;
                 }
@@@ -15675,16 -15042,6 +15677,16 @@@
                 return -EINVAL;
         }
   
+ +      /*
+ +       * gen2/3 display engine uses the fence if present,
+ +       * so the tiling mode must match the fb modifier exactly.
+ +       */
+ +      if (INTEL_INFO(dev_priv)->gen < 4 &&
+ +          tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
+ +              DRM_DEBUG("tiling_mode must match fb modifier exactly on gen2/3\n");
+ +              return -EINVAL;
+ +      }
+ +
         stride_alignment = intel_fb_stride_alignment(dev_priv,
                                                      mode_cmd->modifier[0],
                                                      mode_cmd->pixel_format);
@@@ -15704,15 -15061,10 +15706,15 @@@
                 return -EINVAL;
         }
   
- -      if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED &&
- -          mode_cmd->pitches[0] != obj->stride) {
+ +      /*
+ +       * If there's a fence, enforce that
+ +       * the fb pitch and fence stride match.
+ +       */
+ +      if (tiling != I915_TILING_NONE &&
+ +          mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) {
                 DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n",
- -                        mode_cmd->pitches[0], obj->stride);
+ +                        mode_cmd->pitches[0],
+ +                        i915_gem_object_get_stride(obj));
                 return -EINVAL;
         }
   
@@@ -15725,18 -15077,16 +15727,18 @@@
                 break;
         case DRM_FORMAT_XRGB1555:
                 if (INTEL_INFO(dev)->gen > 3) {
- -                      DRM_DEBUG("unsupported pixel format: %s\n",
- -                                drm_get_format_name(mode_cmd->pixel_format));
+ +                      format_name = drm_get_format_name(mode_cmd->pixel_format);
+ +                      DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ +                      kfree(format_name);
                         return -EINVAL;
                 }
                 break;
         case DRM_FORMAT_ABGR8888:
                 if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
                     INTEL_INFO(dev)->gen < 9) {
- -                      DRM_DEBUG("unsupported pixel format: %s\n",
- -                                drm_get_format_name(mode_cmd->pixel_format));
+ +                      format_name = drm_get_format_name(mode_cmd->pixel_format);
+ +                      DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ +                      kfree(format_name);
                         return -EINVAL;
                 }
                 break;
@@@ -15744,17 -15094,15 +15746,17 @@@
         case DRM_FORMAT_XRGB2101010:
         case DRM_FORMAT_XBGR2101010:
                 if (INTEL_INFO(dev)->gen < 4) {
- -                      DRM_DEBUG("unsupported pixel format: %s\n",
- -                                drm_get_format_name(mode_cmd->pixel_format));
+ +                      format_name = drm_get_format_name(mode_cmd->pixel_format);
+ +                      DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ +                      kfree(format_name);
                         return -EINVAL;
                 }
                 break;
         case DRM_FORMAT_ABGR2101010:
                 if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
- -                      DRM_DEBUG("unsupported pixel format: %s\n",
- -                                drm_get_format_name(mode_cmd->pixel_format));
+ +                      format_name = drm_get_format_name(mode_cmd->pixel_format);
+ +                      DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ +                      kfree(format_name);
                         return -EINVAL;
                 }
                 break;
@@@ -15763,16 -15111,14 +15765,16 @@@
         case DRM_FORMAT_YVYU:
         case DRM_FORMAT_VYUY:
                 if (INTEL_INFO(dev)->gen < 5) {
- -                      DRM_DEBUG("unsupported pixel format: %s\n",
- -                                drm_get_format_name(mode_cmd->pixel_format));
+ +                      format_name = drm_get_format_name(mode_cmd->pixel_format);
+ +                      DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ +                      kfree(format_name);
                         return -EINVAL;
                 }
                 break;
         default:
- -              DRM_DEBUG("unsupported pixel format: %s\n",
- -                        drm_get_format_name(mode_cmd->pixel_format));
+ +              format_name = drm_get_format_name(mode_cmd->pixel_format);
+ +              DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ +              kfree(format_name);
                 return -EINVAL;
         }
   
@@@ -15780,12 -15126,17 +15782,12 @@@
         if (mode_cmd->offsets[0] != 0)
                 return -EINVAL;
   
- -      aligned_height = intel_fb_align_height(dev, mode_cmd->height,
- -                                             mode_cmd->pixel_format,
- -                                             mode_cmd->modifier[0]);
- -      /* FIXME drm helper for size checks (especially planar formats)? */
- -      if (obj->base.size < aligned_height * mode_cmd->pitches[0])
- -              return -EINVAL;
- -
         drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd);
         intel_fb->obj = obj;
   
- -      intel_fill_fb_info(dev_priv, &intel_fb->base);
+ +      ret = intel_fill_fb_info(dev_priv, &intel_fb->base);
+ +      if (ret)
+ +              return ret;
   
         ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
         if (ret) {
@@@ -15807,13 -15158,13 +15809,13 @@@ intel_user_framebuffer_create(struct dr
         struct drm_i915_gem_object *obj;
         struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd;
   
- -      obj = to_intel_bo(drm_gem_object_lookup(filp, mode_cmd.handles[0]));
- -      if (&obj->base == NULL)
+ +      obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]);
+ +      if (!obj)
                 return ERR_PTR(-ENOENT);
   
         fb = intel_framebuffer_create(dev, &mode_cmd, obj);
         if (IS_ERR(fb))
- -              drm_gem_object_unreference_unlocked(&obj->base);
+ +              i915_gem_object_put_unlocked(obj);
   
         return fb;
   }
@@@ -15996,11 -15347,6 +15998,11 @@@ void intel_init_display_hooks(struct dr
                         skl_modeset_calc_cdclk;
         }
   
+ +      if (dev_priv->info.gen >= 9)
+ +              dev_priv->display.update_crtcs = skl_update_crtcs;
+ +      else
+ +              dev_priv->display.update_crtcs = intel_update_crtcs;
+ +
         switch (INTEL_INFO(dev_priv)->gen) {
         case 2:
                 dev_priv->display.queue_flip = intel_gen2_queue_flip;
@@@ -16202,16 -15548,15 +16204,16 @@@ static void intel_init_quirks(struct dr
   static void i915_disable_vga(struct drm_device *dev)
   {
         struct drm_i915_private *dev_priv = to_i915(dev);
+ +      struct pci_dev *pdev = dev_priv->drm.pdev;
         u8 sr1;
         i915_reg_t vga_reg = i915_vgacntrl_reg(dev);
   
         /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */
- -      vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO);
+ +      vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO);
         outb(SR01, VGA_SR_INDEX);
         sr1 = inb(VGA_SR_DATA);
         outb(sr1 | 1<<5, VGA_SR_DATA);
- -      vga_put(dev->pdev, VGA_RSRC_LEGACY_IO);
+ +      vga_put(pdev, VGA_RSRC_LEGACY_IO);
         udelay(300);
   
         I915_WRITE(vga_reg, VGA_DISP_DISABLE);
@@@ -16227,6 -15572,7 +16229,6 @@@ void intel_modeset_init_hw(struct drm_d
         dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq;
   
         intel_init_clock_gating(dev);
- -      intel_enable_gt_powersave(dev_priv);
   }
   
   /*
@@@ -16493,22 -15839,15 +16495,22 @@@ static bool intel_crtc_has_encoders(str
         return false;
   }
   
- -static bool intel_encoder_has_connectors(struct intel_encoder *encoder)
+ +static struct intel_connector *intel_encoder_find_connector(struct intel_encoder *encoder)
   {
         struct drm_device *dev = encoder->base.dev;
         struct intel_connector *connector;
   
         for_each_connector_on_encoder(dev, &encoder->base, connector)
- -              return true;
+ +              return connector;
   
- -      return false;
+ +      return NULL;
+ +}
+ +
+ +static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
+ +                            enum transcoder pch_transcoder)
+ +{
+ +      return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) ||
+ +              (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
   }
   
   static void intel_sanitize_crtc(struct intel_crtc *crtc)
@@@ -16554,7 -15893,7 +16556,7 @@@
                  * Temporarily change the plane mapping and disable everything
                  * ...  */
                 plane = crtc->plane;
- -              to_intel_plane_state(crtc->base.primary->state)->visible = true;
+ +              to_intel_plane_state(crtc->base.primary->state)->base.visible = true;
                 crtc->plane = !plane;
                 intel_crtc_disable_noatomic(&crtc->base);
                 crtc->plane = plane;
@@@ -16589,23 -15928,14 +16591,23 @@@
                  * worst a fifo underrun happens which also sets this to false.
                  */
                 crtc->cpu_fifo_underrun_disabled = true;
- -              crtc->pch_fifo_underrun_disabled = true;
+ +              /*
+ +               * We track the PCH trancoder underrun reporting state
+ +               * within the crtc. With crtc for pipe A housing the underrun
+ +               * reporting state for PCH transcoder A, crtc for pipe B housing
+ +               * it for PCH transcoder B, etc. LPT-H has only PCH transcoder A,
+ +               * and marking underrun reporting as disabled for the non-existing
+ +               * PCH transcoders B and C would prevent enabling the south
+ +               * error interrupt (see cpt_can_enable_serr_int()).
+ +               */
+ +              if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe))
+ +                      crtc->pch_fifo_underrun_disabled = true;
         }
   }
   
   static void intel_sanitize_encoder(struct intel_encoder *encoder)
   {
         struct intel_connector *connector;
- -      struct drm_device *dev = encoder->base.dev;
   
         /* We need to check both for a crtc link (meaning that the
          * encoder is active and trying to read from a pipe) and the
@@@ -16613,8 -15943,7 +16615,8 @@@
         bool has_active_crtc = encoder->base.crtc &&
                 to_intel_crtc(encoder->base.crtc)->active;
   
- -      if (intel_encoder_has_connectors(encoder) && !has_active_crtc) {
+ +      connector = intel_encoder_find_connector(encoder);
+ +      if (connector && !has_active_crtc) {
                 DRM_DEBUG_KMS("[ENCODER:%d:%s] has active connectors but no active pipe!\n",
                               encoder->base.base.id,
                               encoder->base.name);
@@@ -16623,14 -15952,12 +16625,14 @@@
                  * fallout from our resume register restoring. Disable
                  * the encoder manually again. */
                 if (encoder->base.crtc) {
+ +                      struct drm_crtc_state *crtc_state = encoder->base.crtc->state;
+ +
                         DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n",
                                       encoder->base.base.id,
                                       encoder->base.name);
- -                      encoder->disable(encoder);
+ +                      encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
                         if (encoder->post_disable)
- -                              encoder->post_disable(encoder);
+ +                              encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
                 }
                 encoder->base.crtc = NULL;
   
@@@ -16638,9 -15965,12 +16640,9 @@@
                  * a bug in one of the get_hw_state functions. Or someplace else
                  * in our code, like the register restore mess on resume. Clamp
                  * things to off as a safer default. */
- -              for_each_intel_connector(dev, connector) {
- -                      if (connector->encoder != encoder)
- -                              continue;
- -                      connector->base.dpms = DRM_MODE_DPMS_OFF;
- -                      connector->base.encoder = NULL;
- -              }
+ +
+ +              connector->base.dpms = DRM_MODE_DPMS_OFF;
+ +              connector->base.encoder = NULL;
         }
         /* Enabled encoders without active connectors will be fixed in
          * the crtc fixup. */
@@@ -16690,10 -16020,10 +16692,10 @@@ static void readout_plane_state(struct 
         struct intel_plane_state *plane_state =
                 to_intel_plane_state(primary->state);
   
- -      plane_state->visible = crtc->active &&
+ +      plane_state->base.visible = crtc->active &&
                 primary_get_hw_state(to_intel_plane(primary));
   
- -      if (plane_state->visible)
+ +      if (plane_state->base.visible)
                 crtc->base.state->plane_mask |= 1 << drm_plane_index(primary);
   }
   
@@@ -16952,6 -16282,7 +16954,6 @@@ void intel_modeset_gem_init(struct drm_
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct drm_crtc *c;
         struct drm_i915_gem_object *obj;
- -      int ret;
   
         intel_init_gt_powersave(dev_priv);
   
@@@ -16965,17 -16296,15 +16967,17 @@@
          * for this.
          */
         for_each_crtc(dev, c) {
+ +              struct i915_vma *vma;
+ +
                 obj = intel_fb_obj(c->primary->fb);
                 if (obj == NULL)
                         continue;
   
                 mutex_lock(&dev->struct_mutex);
- -              ret = intel_pin_and_fence_fb_obj(c->primary->fb,
+ +              vma = intel_pin_and_fence_fb_obj(c->primary->fb,
                                                  c->primary->state->rotation);
                 mutex_unlock(&dev->struct_mutex);
- -              if (ret) {
+ +              if (IS_ERR(vma)) {
                         DRM_ERROR("failed to pin boot fb on pipe %d\n",
                                   to_intel_crtc(c)->pipe);
                         drm_framebuffer_unreference(c->primary->fb);
diff --combined drivers/gpu/drm/imx/ipuv3-crtc.c

index 9df29f1cb16af029a7bcbdd93a56562be464a2ef,462056e4b9e48b80cd5bf30ed618b12c27ae6c97..4e1ae3fc462dc65591d2fa5b3f6dffe3ee8a4ad4
--- 1/drivers/gpu/drm/imx/ipuv3-crtc.c
--- 2/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@@ -21,6 -21,7 +21,6 @@@
   #include <drm/drm_atomic.h>
   #include <drm/drm_atomic_helper.h>
   #include <drm/drm_crtc_helper.h>
- -#include <linux/fb.h>
   #include <linux/clk.h>
   #include <linux/errno.h>
   #include <drm/drm_gem_cma_helper.h>
@@@ -60,8 -61,7 +60,8 @@@ static void ipu_crtc_enable(struct drm_
         ipu_di_enable(ipu_crtc->di);
   }
   
- -static void ipu_crtc_disable(struct drm_crtc *crtc)
+ +static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
+ +                                  struct drm_crtc_state *old_crtc_state)
   {
         struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
         struct ipu_soc *ipu = dev_get_drvdata(ipu_crtc->dev->parent);
@@@ -77,8 -77,7 +77,10 @@@
         }
         spin_unlock_irq(&crtc->dev->event_lock);
   
+ +      /* always disable planes on the CRTC */
+ +      drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, true);
++
+       drm_crtc_vblank_off(crtc);
   }
   
   static void imx_drm_crtc_reset(struct drm_crtc *crtc)
@@@ -124,14 -123,9 +126,14 @@@ static void imx_drm_crtc_destroy_state(
         kfree(to_imx_crtc_state(state));
   }
   
+ +static void imx_drm_crtc_destroy(struct drm_crtc *crtc)
+ +{
+ +      imx_drm_remove_crtc(to_ipu_crtc(crtc)->imx_crtc);
+ +}
+ +
   static const struct drm_crtc_funcs ipu_crtc_funcs = {
         .set_config = drm_atomic_helper_set_config,
- -      .destroy = drm_crtc_cleanup,
+ +      .destroy = imx_drm_crtc_destroy,
         .page_flip = drm_atomic_helper_page_flip,
         .reset = imx_drm_crtc_reset,
         .atomic_duplicate_state = imx_drm_crtc_duplicate_state,
@@@ -142,7 -136,7 +144,7 @@@ static irqreturn_t ipu_irq_handler(int 
   {
         struct ipu_crtc *ipu_crtc = dev_id;
   
- -      imx_drm_handle_vblank(ipu_crtc->imx_crtc);
+ +      drm_crtc_handle_vblank(&ipu_crtc->base);
   
         return IRQ_HANDLED;
   }
@@@ -183,6 -177,8 +185,8 @@@ static int ipu_crtc_atomic_check(struc
   static void ipu_crtc_atomic_begin(struct drm_crtc *crtc,
                                   struct drm_crtc_state *old_crtc_state)
   {
+       drm_crtc_vblank_on(crtc);
+ 
         spin_lock_irq(&crtc->dev->event_lock);
         if (crtc->state->event) {
                 WARN_ON(drm_crtc_vblank_get(crtc));
@@@ -250,7 -246,7 +254,7 @@@ static const struct drm_crtc_helper_fun
         .mode_set_nofb = ipu_crtc_mode_set_nofb,
         .atomic_check = ipu_crtc_atomic_check,
         .atomic_begin = ipu_crtc_atomic_begin,
- -      .disable = ipu_crtc_disable,
+ +      .atomic_disable = ipu_crtc_atomic_disable,
         .enable = ipu_crtc_enable,
   };
   
@@@ -418,6 -414,8 +422,6 @@@ static void ipu_drm_unbind(struct devic
   {
         struct ipu_crtc *ipu_crtc = dev_get_drvdata(dev);
   
- -      imx_drm_remove_crtc(ipu_crtc->imx_crtc);
- -
         ipu_put_resources(ipu_crtc);
         if (ipu_crtc->plane[1])
                 ipu_plane_put_resources(ipu_crtc->plane[1]);
diff --combined drivers/gpu/drm/msm/msm_gem.c

index 0a9b5580b2e92ce8322ad795152c8dd3b1b393f9,85f3047e05aee05aa42a654f9be48cb899964acf..b6ac27e3192964cbf2fa1f5a4c139ab282deaa71
--- 1/drivers/gpu/drm/msm/msm_gem.c
--- 2/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@@ -196,11 -196,20 +196,20 @@@ int msm_gem_fault(struct vm_area_struc
   {
         struct drm_gem_object *obj = vma->vm_private_data;
         struct drm_device *dev = obj->dev;
+       struct msm_drm_private *priv = dev->dev_private;
         struct page **pages;
         unsigned long pfn;
         pgoff_t pgoff;
         int ret;
   
+       /* This should only happen if userspace tries to pass a mmap'd
+        * but unfaulted gem bo vaddr into submit ioctl, triggering
+        * a page fault while struct_mutex is already held.  This is
+        * not a valid use-case so just bail.
+        */
+       if (priv->struct_mutex_task == current)
+               return VM_FAULT_SIGBUS;
+ 
         /* Make sure we don't parallel update on a fault, nor move or remove
          * something from beneath our feet
          */
@@@ -584,16 -593,18 +593,16 @@@ int msm_gem_cpu_prep(struct drm_gem_obj
   {
         struct msm_gem_object *msm_obj = to_msm_bo(obj);
         bool write = !!(op & MSM_PREP_WRITE);
- -
- -      if (op & MSM_PREP_NOSYNC) {
- -              if (!reservation_object_test_signaled_rcu(msm_obj->resv, write))
- -                      return -EBUSY;
- -      } else {
- -              int ret;
- -
- -              ret = reservation_object_wait_timeout_rcu(msm_obj->resv, write,
- -                              true, timeout_to_jiffies(timeout));
- -              if (ret <= 0)
- -                      return ret == 0 ? -ETIMEDOUT : ret;
- -      }
+ +      unsigned long remain =
+ +              op & MSM_PREP_NOSYNC ? 0 : timeout_to_jiffies(timeout);
+ +      long ret;
+ +
+ +      ret = reservation_object_wait_timeout_rcu(msm_obj->resv, write,
+ +                                                true,  remain);
+ +      if (ret == 0)
+ +              return remain == 0 ? -EBUSY : -ETIMEDOUT;
+ +      else if (ret < 0)
+ +              return ret;
   
         /* TODO cache maintenance */
   
diff --combined drivers/gpu/drm/msm/msm_gem_submit.c

index 3ac14cd1e5b9a023666a3b576a8ed73de312dc0d,880d6a9af7c8d28dae1beb87492e849702709fb0..b6a0f37a65f30cad9f85d5773fca802e9ea3bb6b
--- 1/drivers/gpu/drm/msm/msm_gem_submit.c
--- 2/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@@ -15,8 -15,6 +15,8 @@@
    * this program.  If not, see <http://www.gnu.org/licenses/>.
    */
   
+ +#include <linux/sync_file.h>
+ +
   #include "msm_drv.h"
   #include "msm_gpu.h"
   #include "msm_gem.h"
@@@ -66,6 -64,14 +66,14 @@@ void msm_gem_submit_free(struct msm_gem
         kfree(submit);
   }
   
+ static inline unsigned long __must_check
+ copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
+ {
+       if (access_ok(VERIFY_READ, from, n))
+               return __copy_from_user_inatomic(to, from, n);
+       return -EFAULT;
+ }
+ 
   static int submit_lookup_objects(struct msm_gem_submit *submit,
                 struct drm_msm_gem_submit *args, struct drm_file *file)
   {
@@@ -73,6 -79,7 +81,7 @@@
         int ret = 0;
   
         spin_lock(&file->table_lock);
+       pagefault_disable();
   
         for (i = 0; i < args->nr_bos; i++) {
                 struct drm_msm_gem_submit_bo submit_bo;
@@@ -86,10 -93,15 +95,15 @@@
                  */
                 submit->bos[i].flags = 0;
   
-               ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
-               if (ret) {
-                       ret = -EFAULT;
-                       goto out_unlock;
+               ret = copy_from_user_inatomic(&submit_bo, userptr, sizeof(submit_bo));
+               if (unlikely(ret)) {
+                       pagefault_enable();
+                       spin_unlock(&file->table_lock);
+                       ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
+                       if (ret)
+                               goto out;
+                       spin_lock(&file->table_lock);
+                       pagefault_disable();
                 }
   
                 if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) {
@@@ -129,9 -141,12 +143,12 @@@
         }
   
   out_unlock:
-       submit->nr_bos = i;
+       pagefault_enable();
         spin_unlock(&file->table_lock);
   
+ out:
+       submit->nr_bos = i;
+ 
         return ret;
   }
   
@@@ -363,9 -378,6 +380,9 @@@ int msm_ioctl_gem_submit(struct drm_dev
         struct msm_file_private *ctx = file->driver_priv;
         struct msm_gem_submit *submit;
         struct msm_gpu *gpu = priv->gpu;
+ +      struct fence *in_fence = NULL;
+ +      struct sync_file *sync_file = NULL;
+ +      int out_fence_fd = -1;
         unsigned i;
         int ret;
   
@@@ -375,23 -387,14 +392,24 @@@
         /* for now, we just have 3d pipe.. eventually this would need to
          * be more clever to dispatch to appropriate gpu module:
          */
- -      if (args->pipe != MSM_PIPE_3D0)
+ +      if (MSM_PIPE_ID(args->flags) != MSM_PIPE_3D0)
+ +              return -EINVAL;
+ +
+ +      if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS)
                 return -EINVAL;
   
         ret = mutex_lock_interruptible(&dev->struct_mutex);
         if (ret)
                 return ret;
   
+ +      if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
+ +              out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+ +              if (out_fence_fd < 0) {
+ +                      ret = out_fence_fd;
+ +                      goto out_unlock;
+ +              }
+ +      }
+       priv->struct_mutex_task = current;
   
         submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds);
         if (!submit) {
@@@ -407,32 -410,9 +425,32 @@@
         if (ret)
                 goto out;
   
- -      ret = submit_fence_sync(submit);
- -      if (ret)
- -              goto out;
+ +      if (args->flags & MSM_SUBMIT_FENCE_FD_IN) {
+ +              in_fence = sync_file_get_fence(args->fence_fd);
+ +
+ +              if (!in_fence) {
+ +                      ret = -EINVAL;
+ +                      goto out;
+ +              }
+ +
+ +              /* TODO if we get an array-fence due to userspace merging multiple
+ +               * fences, we need a way to determine if all the backing fences
+ +               * are from our own context..
+ +               */
+ +
+ +              if (in_fence->context != gpu->fctx->context) {
+ +                      ret = fence_wait(in_fence, true);
+ +                      if (ret)
+ +                              goto out;
+ +              }
+ +
+ +      }
+ +
+ +      if (!(args->fence & MSM_SUBMIT_NO_IMPLICIT)) {
+ +              ret = submit_fence_sync(submit);
+ +              if (ret)
+ +                      goto out;
+ +      }
   
         ret = submit_pin_objects(submit);
         if (ret)
@@@ -498,39 -478,16 +516,40 @@@
   
         submit->nr_cmds = i;
   
- -      ret = msm_gpu_submit(gpu, submit, ctx);
+ +      submit->fence = msm_fence_alloc(gpu->fctx);
+ +      if (IS_ERR(submit->fence)) {
+ +              ret = PTR_ERR(submit->fence);
+ +              submit->fence = NULL;
+ +              goto out;
+ +      }
+ +
+ +      if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
+ +              sync_file = sync_file_create(submit->fence);
+ +              if (!sync_file) {
+ +                      ret = -ENOMEM;
+ +                      goto out;
+ +              }
+ +      }
+ +
+ +      msm_gpu_submit(gpu, submit, ctx);
   
         args->fence = submit->fence->seqno;
   
+ +      if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
+ +              fd_install(out_fence_fd, sync_file->file);
+ +              args->fence_fd = out_fence_fd;
+ +      }
+ +
   out:
+ +      if (in_fence)
+ +              fence_put(in_fence);
         submit_cleanup(submit);
         if (ret)
                 msm_gem_submit_free(submit);
   out_unlock:
+ +      if (ret && (out_fence_fd >= 0))
+ +              put_unused_fd(out_fence_fd);
+       priv->struct_mutex_task = NULL;
         mutex_unlock(&dev->struct_mutex);
         return ret;
   }
diff --combined drivers/gpu/drm/radeon/atombios_crtc.c

index 4824f70b0258e856f1d613867de1627af70eda34,1dcf39084555b7edb15ab6a816ead9a0fe60374c..a4e9f35da3a22e87a2c35ad18b99f359999f4e9a
--- 1/drivers/gpu/drm/radeon/atombios_crtc.c
--- 2/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@@ -627,7 -627,9 +627,9 @@@ static u32 atombios_adjust_pll(struct d
                         if (radeon_crtc->ss.refdiv) {
                                 radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
                                 radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv;
-                               if (rdev->family >= CHIP_RV770)
+                               if (ASIC_IS_AVIVO(rdev) &&
+                                   rdev->family != CHIP_RS780 &&
+                                   rdev->family != CHIP_RS880)
                                         radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
                         }
                 }
@@@ -1154,7 -1156,6 +1156,7 @@@ static int dce4_crtc_do_set_base(struc
         u32 tmp, viewport_w, viewport_h;
         int r;
         bool bypass_lut = false;
+ +      char *format_name;
   
         /* no fb bound */
         if (!atomic && !crtc->primary->fb) {
@@@ -1258,9 -1259,8 +1260,9 @@@
                 bypass_lut = true;
                 break;
         default:
- -              DRM_ERROR("Unsupported screen format %s\n",
- -                        drm_get_format_name(target_fb->pixel_format));
+ +              format_name = drm_get_format_name(target_fb->pixel_format);
+ +              DRM_ERROR("Unsupported screen format %s\n", format_name);
+ +              kfree(format_name);
                 return -EINVAL;
         }
   
@@@ -1435,8 -1435,8 +1437,8 @@@
         WREG32(EVERGREEN_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
                (viewport_w << 16) | viewport_h);
   
- -      /* set pageflip to happen only at start of vblank interval (front porch) */
- -      WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3);
+ +      /* set pageflip to happen anywhere in vblank interval */
+ +      WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0);
   
         if (!atomic && fb && fb != crtc->primary->fb) {
                 radeon_fb = to_radeon_framebuffer(fb);
@@@ -1471,7 -1471,6 +1473,7 @@@ static int avivo_crtc_do_set_base(struc
         u32 viewport_w, viewport_h;
         int r;
         bool bypass_lut = false;
+ +      char *format_name;
   
         /* no fb bound */
         if (!atomic && !crtc->primary->fb) {
@@@ -1561,9 -1560,8 +1563,9 @@@
                 bypass_lut = true;
                 break;
         default:
- -              DRM_ERROR("Unsupported screen format %s\n",
- -                        drm_get_format_name(target_fb->pixel_format));
+ +              format_name = drm_get_format_name(target_fb->pixel_format);
+ +              DRM_ERROR("Unsupported screen format %s\n", format_name);
+ +              kfree(format_name);
                 return -EINVAL;
         }
   
@@@ -1636,8 -1634,8 +1638,8 @@@
         WREG32(AVIVO_D1MODE_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
                (viewport_w << 16) | viewport_h);
   
- -      /* set pageflip to happen only at start of vblank interval (front porch) */
- -      WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3);
+ +      /* set pageflip to happen anywhere in vblank interval */
+ +      WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0);
   
         if (!atomic && fb && fb != crtc->primary->fb) {
                 radeon_fb = to_radeon_framebuffer(fb);
diff --combined drivers/gpu/drm/radeon/radeon_atpx_handler.c

index a1321b2fa454dc1a6ab8dd87bdbb9ac13a5c2ece,ddef0d4940843105de67327a7c5222d4ee6a78ed..2fdcd04bc93f7b9c6abf5d84752836e154d566b0
--- 1/drivers/gpu/drm/radeon/radeon_atpx_handler.c
--- 2/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@@ -29,7 -29,6 +29,7 @@@ struct radeon_atpx 
         acpi_handle handle;
         struct radeon_atpx_functions functions;
         bool is_hybrid;
+ +      bool dgpu_req_power_for_displays;
   };
   
   static struct radeon_atpx_priv {
@@@ -73,10 -72,6 +73,10 @@@ bool radeon_is_atpx_hybrid(void) 
         return radeon_atpx_priv.atpx.is_hybrid;
   }
   
+ +bool radeon_atpx_dgpu_req_power_for_displays(void) {
+ +      return radeon_atpx_priv.atpx.dgpu_req_power_for_displays;
+ +}
+ +
   /**
    * radeon_atpx_call - call an ATPX method
    *
@@@ -203,16 -198,7 +203,7 @@@ static int radeon_atpx_validate(struct 
         atpx->is_hybrid = false;
         if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                 printk("ATPX Hybrid Graphics\n");
- #if 1
-               /* This is a temporary hack until the D3 cold support
-                * makes it upstream.  The ATPX power_control method seems
-                * to still work on even if the system should be using
-                * the new standardized hybrid D3 cold ACPI interface.
-                */
-               atpx->functions.power_cntl = true;
- #else
                 atpx->functions.power_cntl = false;
- #endif
                 atpx->is_hybrid = true;
         }
   
diff --combined drivers/gpu/drm/radeon/radeon_ttm.c

index 27ee0ab0e1a75629c21ebe87761f490cdd3bbcea,c2e0a1ccdfbce8b0db402d1091363e070e465c9d..455268214b893eac36e8bbd65d5e2b18d2735483
--- 1/drivers/gpu/drm/radeon/radeon_ttm.c
--- 2/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@@ -237,8 -237,7 +237,8 @@@ static int radeon_verify_access(struct 
   
         if (radeon_ttm_tt_has_userptr(bo->ttm))
                 return -EPERM;
- -      return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp);
+ +      return drm_vma_node_verify_access(&rbo->gem_base.vma_node,
+ +                                        filp->private_data);
   }
   
   static void radeon_move_null(struct ttm_buffer_object *bo,
@@@ -264,8 -263,8 +264,8 @@@ static int radeon_move_blit(struct ttm_
   
         rdev = radeon_get_rdev(bo->bdev);
         ridx = radeon_copy_ring_index(rdev);
-       old_start = old_mem->start << PAGE_SHIFT;
-       new_start = new_mem->start << PAGE_SHIFT;
+       old_start = (u64)old_mem->start << PAGE_SHIFT;
+       new_start = (u64)new_mem->start << PAGE_SHIFT;
   
         switch (old_mem->mem_type) {
         case TTM_PL_VRAM:
@@@ -347,7 -346,7 +347,7 @@@ static int radeon_move_vram_ram(struct 
         if (unlikely(r)) {
                 goto out_cleanup;
         }
- -      r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
+ +      r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem);
   out_cleanup:
         ttm_bo_mem_put(bo, &tmp_mem);
         return r;
@@@ -380,7 -379,7 +380,7 @@@ static int radeon_move_ram_vram(struct 
         if (unlikely(r)) {
                 return r;
         }
- -      r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
+ +      r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem);
         if (unlikely(r)) {
                 goto out_cleanup;
         }
@@@ -445,7 -444,8 +445,7 @@@ static int radeon_bo_move(struct ttm_bu
   
         if (r) {
   memcpy:
- -              r = ttm_bo_move_memcpy(bo, evict, interruptible,
- -                                     no_wait_gpu, new_mem);
+ +              r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem);
                 if (r) {
                         return r;
                 }
diff --combined drivers/gpu/drm/vc4/vc4_drv.c

index 3c9e7f64b9261efff0faef744f5131a4899cda16,9ecef93854914579ee74b4d96432d15dfaa1fd6b..8703f56b794774ac4130a7b487472e293ebe7cdf
--- 1/drivers/gpu/drm/vc4/vc4_drv.c
--- 2/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@@ -16,7 -16,6 +16,7 @@@
   #include <linux/platform_device.h>
   #include <linux/pm_runtime.h>
   #include "drm_fb_cma_helper.h"
+ +#include <drm/drm_fb_helper.h>
   
   #include "uapi/drm/vc4_drm.h"
   #include "vc4_drv.h"
@@@ -58,21 -57,21 +58,21 @@@ static int vc4_get_param_ioctl(struct d
         switch (args->param) {
         case DRM_VC4_PARAM_V3D_IDENT0:
                 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-               if (ret)
+               if (ret < 0)
                         return ret;
                 args->value = V3D_READ(V3D_IDENT0);
                 pm_runtime_put(&vc4->v3d->pdev->dev);
                 break;
         case DRM_VC4_PARAM_V3D_IDENT1:
                 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-               if (ret)
+               if (ret < 0)
                         return ret;
                 args->value = V3D_READ(V3D_IDENT1);
                 pm_runtime_put(&vc4->v3d->pdev->dev);
                 break;
         case DRM_VC4_PARAM_V3D_IDENT2:
                 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-               if (ret)
+               if (ret < 0)
                         return ret;
                 args->value = V3D_READ(V3D_IDENT2);
                 pm_runtime_put(&vc4->v3d->pdev->dev);
@@@ -215,7 -214,7 +215,7 @@@ static void vc4_kick_out_firmware_fb(vo
         ap->ranges[0].base = 0;
         ap->ranges[0].size = ~0;
   
- -      remove_conflicting_framebuffers(ap, "vc4drmfb", false);
+ +      drm_fb_helper_remove_conflicting_framebuffers(ap, "vc4drmfb", false);
         kfree(ap);
   }
   
@@@ -233,8 -232,8 +233,8 @@@ static int vc4_drm_bind(struct device *
                 return -ENOMEM;
   
         drm = drm_dev_alloc(&vc4_drm_driver, dev);
- -      if (!drm)
- -              return -ENOMEM;
+ +      if (IS_ERR(drm))
+ +              return PTR_ERR(drm);
         platform_set_drvdata(pdev, drm);
         vc4->dev = drm;
         drm->dev_private = vc4;
diff --combined drivers/gpu/drm/vc4/vc4_gem.c

index 27c52ec351939eca2131ff2bfd5f198cf097ebaa,b262c5c26f109702572ed613fa0965f06a993ef9..77daea6cb8668df85d70d4b760f4e5a5be41800a
--- 1/drivers/gpu/drm/vc4/vc4_gem.c
--- 2/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@@ -419,6 -419,10 +419,6 @@@ again
   
         vc4_flush_caches(dev);
   
- -      /* Disable the binner's pre-loaded overflow memory address */
- -      V3D_WRITE(V3D_BPOA, 0);
- -      V3D_WRITE(V3D_BPOS, 0);
- -
         /* Either put the job in the binner if it uses the binner, or
          * immediately move it to the to-be-rendered queue.
          */
@@@ -530,8 -534,8 +530,8 @@@ vc4_cl_lookup_bos(struct drm_device *de
                 return -EINVAL;
         }
   
-       exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
-                          GFP_KERNEL);
+       exec->bo = drm_calloc_large(exec->bo_count,
+                                   sizeof(struct drm_gem_cma_object *));
         if (!exec->bo) {
                 DRM_ERROR("Failed to allocate validated BO pointers\n");
                 return -ENOMEM;
@@@ -568,8 -572,8 +568,8 @@@
         spin_unlock(&file_priv->table_lock);
   
   fail:
-       kfree(handles);
-       return 0;
+       drm_free_large(handles);
+       return ret;
   }
   
   static int
@@@ -604,7 -608,7 +604,7 @@@ vc4_get_bcl(struct drm_device *dev, str
          * read the contents back for validation, and I think the
          * bo->vaddr is uncached access.
          */
-       temp = kmalloc(temp_size, GFP_KERNEL);
+       temp = drm_malloc_ab(temp_size, 1);
         if (!temp) {
                 DRM_ERROR("Failed to allocate storage for copying "
                           "in bin/render CLs.\n");
@@@ -671,7 -675,7 +671,7 @@@
         ret = vc4_validate_shader_recs(dev, exec);
   
   fail:
-       kfree(temp);
+       drm_free_large(temp);
         return ret;
   }
   
@@@ -684,7 -688,7 +684,7 @@@ vc4_complete_exec(struct drm_device *de
         if (exec->bo) {
                 for (i = 0; i < exec->bo_count; i++)
                         drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
-               kfree(exec->bo);
+               drm_free_large(exec->bo);
         }
   
         while (!list_empty(&exec->unref_list)) {
@@@ -938,8 -942,8 +938,8 @@@ vc4_gem_destroy(struct drm_device *dev
                 vc4->overflow_mem = NULL;
         }
   
-       vc4_bo_cache_destroy(dev);
- 
         if (vc4->hang_state)
                 vc4_free_hang_state(dev, vc4->hang_state);
+ 
+       vc4_bo_cache_destroy(dev);
   }
author	Dave Airlie <[email protected]>
	Wed, 28 Sep 2016 02:08:49 +0000 (12:08 +1000)
committer	Dave Airlie <[email protected]>
	Wed, 28 Sep 2016 02:08:49 +0000 (12:08 +1000)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/cik_sdma.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/drm_atomic.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/drm_fb_helper.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/i915/i915_gem.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/i915/intel_display.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/imx/ipuv3-crtc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/msm/msm_gem.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/msm/msm_gem_submit.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/radeon/atombios_crtc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/radeon/radeon_atpx_handler.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/radeon/radeon_ttm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/vc4/vc4_drv.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/vc4/vc4_gem.c	patch \|	diff1 \|	diff2 \|	blob \| history