]> Git Repo - linux.git/commitdiff
Merge tag 'v4.8-rc8' into drm-next
authorDave Airlie <[email protected]>
Wed, 28 Sep 2016 02:08:49 +0000 (12:08 +1000)
committerDave Airlie <[email protected]>
Wed, 28 Sep 2016 02:08:49 +0000 (12:08 +1000)
Linux 4.8-rc8

There was a lot of fallout in the imx/amdgpu/i915 drivers, so backmerge
it now to avoid troubles.

* tag 'v4.8-rc8': (1442 commits)
  Linux 4.8-rc8
  fault_in_multipages_readable() throws set-but-unused error
  mm: check VMA flags to avoid invalid PROT_NONE NUMA balancing
  radix tree: fix sibling entry handling in radix_tree_descend()
  radix tree test suite: Test radix_tree_replace_slot() for multiorder entries
  fix memory leaks in tracing_buffers_splice_read()
  tracing: Move mutex to protect against resetting of seq data
  MIPS: Fix delay slot emulation count in debugfs
  MIPS: SMP: Fix possibility of deadlock when bringing CPUs online
  mm: delete unnecessary and unsafe init_tlb_ubc()
  huge tmpfs: fix Committed_AS leak
  shmem: fix tmpfs to handle the huge= option properly
  blk-mq: skip unmapped queues in blk_mq_alloc_request_hctx
  MIPS: Fix pre-r6 emulation FPU initialisation
  arm64: kgdb: handle read-only text / modules
  arm64: Call numa_store_cpu_info() earlier.
  locking/hung_task: Fix typo in CONFIG_DETECT_HUNG_TASK help text
  nvme-rdma: only clear queue flags after successful connect
  i2c: qup: skip qup_i2c_suspend if the device is already runtime suspended
  perf/core: Limit matching exclusive events to one PMU
  ...

25 files changed:
1  2 
MAINTAINERS
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
drivers/gpu/drm/drm_atomic.c
drivers/gpu/drm/drm_fb_helper.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/imx/ipuv3-crtc.c
drivers/gpu/drm/msm/msm_gem.c
drivers/gpu/drm/msm/msm_gem_submit.c
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/radeon_atpx_handler.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/vc4/vc4_drv.c
drivers/gpu/drm/vc4/vc4_gem.c

diff --combined MAINTAINERS
index ad310ec42fe7f4c197385b4e838c72f77bc88219,01bff8ea28d88bf8a667100fa37dd376e049634e..703fcb51b7826ceef41f6d8fc8e9a46b8046a419
@@@ -798,6 -798,7 +798,7 @@@ M: Laura Abbott <[email protected]
  M:    Sumit Semwal <[email protected]>
  L:    [email protected]
  S:    Supported
+ F:    Documentation/devicetree/bindings/staging/ion/
  F:    drivers/staging/android/ion
  F:    drivers/staging/android/uapi/ion.h
  F:    drivers/staging/android/uapi/ion_test.h
@@@ -881,6 -882,15 +882,15 @@@ S:       Supporte
  F:    drivers/gpu/drm/arc/
  F:    Documentation/devicetree/bindings/display/snps,arcpgu.txt
  
+ ARM ARCHITECTED TIMER DRIVER
+ M:    Mark Rutland <[email protected]>
+ M:    Marc Zyngier <[email protected]>
+ L:    [email protected] (moderated for non-subscribers)
+ S:    Maintained
+ F:    arch/arm/include/asm/arch_timer.h
+ F:    arch/arm64/include/asm/arch_timer.h
+ F:    drivers/clocksource/arm_arch_timer.c
  ARM HDLCD DRM DRIVER
  M:    Liviu Dudau <[email protected]>
  S:    Supported
@@@ -1614,7 -1624,8 +1624,8 @@@ N:      rockchi
  
  ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
  M:    Kukjin Kim <[email protected]>
- M:    Krzysztof Kozlowski <[email protected]>
+ M:    Krzysztof Kozlowski <[email protected]>
+ R:    Javier Martinez Canillas <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  L:    [email protected] (moderated for non-subscribers)
  S:    Maintained
@@@ -1634,7 -1645,6 +1645,6 @@@ F:      drivers/*/*s3c64xx
  F:    drivers/*/*s5pv210*
  F:    drivers/memory/samsung/*
  F:    drivers/soc/samsung/*
- F:    drivers/spi/spi-s3c*
  F:    Documentation/arm/Samsung/
  F:    Documentation/devicetree/bindings/arm/samsung/
  F:    Documentation/devicetree/bindings/sram/samsung-sram.txt
@@@ -1822,6 -1832,7 +1832,7 @@@ T:      git git://git.kernel.org/pub/scm/lin
  ARM/UNIPHIER ARCHITECTURE
  M:    Masahiro Yamada <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git
  S:    Maintained
  F:    arch/arm/boot/dts/uniphier*
  F:    arch/arm/include/asm/hardware/cache-uniphier.h
@@@ -2475,7 -2486,7 +2486,7 @@@ F:      include/net/bluetooth
  BONDING DRIVER
  M:    Jay Vosburgh <[email protected]>
  M:    Veaceslav Falico <[email protected]>
- M:    Andy Gospodarek <[email protected]>
+ M:    Andy Gospodarek <[email protected]>
  L:    [email protected]
  W:    http://sourceforge.net/projects/bonding/
  S:    Supported
@@@ -2490,7 -2501,7 +2501,7 @@@ S:      Supporte
  F:    kernel/bpf/
  
  BROADCOM B44 10/100 ETHERNET DRIVER
- M:    Gary Zambrano <zambrano@broadcom.com>
+ M:    Michael Chan <michael.chan@broadcom.com>
  L:    [email protected]
  S:    Supported
  F:    drivers/net/ethernet/broadcom/b44.*
@@@ -3238,7 -3249,7 +3249,7 @@@ F:      kernel/cpuset.
  CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
  M:    Johannes Weiner <[email protected]>
  M:    Michal Hocko <[email protected]>
- M:    Vladimir Davydov <vdavydov@virtuozzo.com>
+ M:    Vladimir Davydov <vdavydov.dev@gmail.com>
  L:    [email protected]
  L:    [email protected]
  S:    Maintained
@@@ -3259,7 -3270,7 +3270,7 @@@ S:      Maintaine
  F:    drivers/net/wan/cosa*
  
  CPMAC ETHERNET DRIVER
- M:    Florian Fainelli <f[email protected]>
+ M:    Florian Fainelli <f[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    drivers/net/ethernet/ti/cpmac.c
@@@ -4064,14 -4075,6 +4075,14 @@@ S:    Orphan / Obsolet
  F:    drivers/gpu/drm/i810/
  F:    include/uapi/drm/i810_drm.h
  
 +DRM DRIVERS FOR MEDIATEK
 +M:    CK Hu <[email protected]>
 +M:    Philipp Zabel <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +F:    drivers/gpu/drm/mediatek/
 +F:    Documentation/devicetree/bindings/display/mediatek/
 +
  DRM DRIVER FOR MSM ADRENO GPU
  M:    Rob Clark <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    drivers/edac/sb_edac.c
  
+ EDAC-SKYLAKE
+ M:    Tony Luck <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    drivers/edac/skx_edac.c
  EDAC-XGENE
  APPLIED MICRO (APM) X-GENE SOC EDAC
  M:     Loc Ho <[email protected]>
@@@ -6094,7 -6103,7 +6111,7 @@@ S:      Supporte
  F:    drivers/cpufreq/intel_pstate.c
  
  INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
- M:    Maik Broemme <mbroemme@plusserver.de>
+ M:    Maik Broemme <mbroemme@libmpq.org>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/fb/intelfb.txt
@@@ -7457,7 -7466,8 +7474,8 @@@ F:      Documentation/devicetree/bindings/so
  F:    sound/soc/codecs/max9860.*
  
  MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
- M:    Krzysztof Kozlowski <[email protected]>
+ M:    Krzysztof Kozlowski <[email protected]>
+ M:    Bartlomiej Zolnierkiewicz <[email protected]>
  L:    [email protected]
  S:    Supported
  F:    drivers/power/max14577_charger.c
@@@ -7473,7 -7483,8 +7491,8 @@@ F:      include/dt-bindings/*/*max77802.
  
  MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
  M:    Chanwoo Choi <[email protected]>
- M:    Krzysztof Kozlowski <[email protected]>
+ M:    Krzysztof Kozlowski <[email protected]>
+ M:    Bartlomiej Zolnierkiewicz <[email protected]>
  L:    [email protected]
  S:    Supported
  F:    drivers/*/max14577*.c
@@@ -7663,7 -7674,7 +7682,7 @@@ L:      [email protected]
  S:    Supported
  W:    https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
  Q:    http://patchwork.kernel.org/project/linux-rdma/list/
- F:    drivers/infiniband/hw/rxe/
+ F:    drivers/infiniband/sw/rxe/
  F:    include/uapi/rdma/rdma_user_rxe.h
  
  MEMBARRIER SUPPORT
@@@ -8150,6 -8161,15 +8169,15 @@@ S:    Maintaine
  W:    https://fedorahosted.org/dropwatch/
  F:    net/core/drop_monitor.c
  
+ NETWORKING [DSA]
+ M:    Andrew Lunn <[email protected]>
+ M:    Vivien Didelot <[email protected]>
+ M:    Florian Fainelli <[email protected]>
+ S:    Maintained
+ F:    net/dsa/
+ F:    include/net/dsa.h
+ F:    drivers/net/dsa/
  NETWORKING [GENERAL]
  M:    "David S. Miller" <[email protected]>
  L:    [email protected]
@@@ -9239,7 -9259,7 +9267,7 @@@ F:      drivers/pinctrl/sh-pfc
  
  PIN CONTROLLER - SAMSUNG
  M:    Tomasz Figa <[email protected]>
- M:    Krzysztof Kozlowski <k[email protected]>
+ M:    Krzysztof Kozlowski <k[email protected]>
  M:    Sylwester Nawrocki <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  L:    [email protected] (moderated for non-subscribers)
@@@ -10172,7 -10192,7 +10200,7 @@@ S:   Maintaine
  F:    drivers/platform/x86/samsung-laptop.c
  
  SAMSUNG AUDIO (ASoC) DRIVERS
- M:    Krzysztof Kozlowski <k[email protected]>
+ M:    Krzysztof Kozlowski <k[email protected]>
  M:    Sangbeom Kim <[email protected]>
  M:    Sylwester Nawrocki <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
@@@ -10187,7 -10207,8 +10215,8 @@@ F:   drivers/video/fbdev/s3c-fb.
  
  SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
  M:    Sangbeom Kim <[email protected]>
- M:    Krzysztof Kozlowski <[email protected]>
+ M:    Krzysztof Kozlowski <[email protected]>
+ M:    Bartlomiej Zolnierkiewicz <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Supported
@@@ -10246,6 -10267,17 +10275,17 @@@ S: Supporte
  L:    [email protected] (moderated for non-subscribers)
  F:    drivers/clk/samsung/
  
+ SAMSUNG SPI DRIVERS
+ M:    Kukjin Kim <[email protected]>
+ M:    Krzysztof Kozlowski <[email protected]>
+ M:    Andi Shyti <[email protected]>
+ L:    [email protected]
+ L:    [email protected] (moderated for non-subscribers)
+ S:    Maintained
+ F:    Documentation/devicetree/bindings/spi/spi-samsung.txt
+ F:    drivers/spi/spi-s3c*
+ F:    include/linux/platform_data/spi-s3c64xx.h
  SAMSUNG SXGBE DRIVERS
  M:    Byungho An <[email protected]>
  M:    Girish K S <[email protected]>
@@@ -11225,12 -11257,8 +11265,8 @@@ S:  Odd Fixe
  F:    drivers/staging/vt665?/
  
  STAGING - WILC1000 WIFI DRIVER
- M:    Johnny Kim <[email protected]>
- M:    Austin Shin <[email protected]>
- M:    Chris Park <[email protected]>
- M:    Tony Cho <[email protected]>
- M:    Glen Lee <[email protected]>
- M:    Leo Kim <[email protected]>
+ M:    Aditya Shankar <[email protected]>
+ M:    Ganesh Krishna <[email protected]>
  L:    [email protected]
  S:    Supported
  F:    drivers/staging/wilc1000/
@@@ -12550,7 -12578,7 +12586,7 @@@ F:   include/linux/if_*vlan.
  F:    net/8021q/
  
  VLYNQ BUS
- M:    Florian Fainelli <f[email protected]>
+ M:    Florian Fainelli <f[email protected]>
  L:    [email protected] (subscribers-only)
  S:    Maintained
  F:    drivers/vlynq/vlynq.c
index 9d79e4ba0213be8c85a60d4fb6ebb58a60a055b1,700c56baf2de7110fdab3dacf1bee91d1a82dbb7..72c68dbb982136b73f84ad881ca30a90ca2f8866
  #include "amdgpu_ih.h"
  #include "amdgpu_irq.h"
  #include "amdgpu_ucode.h"
 +#include "amdgpu_ttm.h"
  #include "amdgpu_gds.h"
  #include "amd_powerplay.h"
  #include "amdgpu_acp.h"
  
  #include "gpu_scheduler.h"
 +#include "amdgpu_virt.h"
  
  /*
   * Modules parameters.
@@@ -65,7 -63,6 +65,7 @@@
  extern int amdgpu_modeset;
  extern int amdgpu_vram_limit;
  extern int amdgpu_gart_size;
 +extern int amdgpu_moverate;
  extern int amdgpu_benchmarking;
  extern int amdgpu_testing;
  extern int amdgpu_audio;
@@@ -94,9 -91,6 +94,9 @@@ extern unsigned amdgpu_pcie_lane_cap
  extern unsigned amdgpu_cg_mask;
  extern unsigned amdgpu_pg_mask;
  extern char *amdgpu_disable_cu;
 +extern int amdgpu_sclk_deep_sleep_en;
 +extern char *amdgpu_virtual_display;
 +extern unsigned amdgpu_pp_feature_mask;
  
  #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS                3000
  #define AMDGPU_MAX_USEC_TIMEOUT                       100000  /* 100 ms */
  #define AMDGPU_MAX_RINGS                      16
  #define AMDGPU_MAX_GFX_RINGS                  1
  #define AMDGPU_MAX_COMPUTE_RINGS              8
 -#define AMDGPU_MAX_VCE_RINGS                  2
 +#define AMDGPU_MAX_VCE_RINGS                  3
  
  /* max number of IP instances */
  #define AMDGPU_MAX_SDMA_INSTANCES             2
@@@ -254,9 -248,10 +254,9 @@@ struct amdgpu_vm_pte_funcs 
                         uint64_t pe, uint64_t src,
                         unsigned count);
        /* write pte one entry at a time with addr mapping */
 -      void (*write_pte)(struct amdgpu_ib *ib,
 -                        const dma_addr_t *pages_addr, uint64_t pe,
 -                        uint64_t addr, unsigned count,
 -                        uint32_t incr, uint32_t flags);
 +      void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
 +                        uint64_t value, unsigned count,
 +                        uint32_t incr);
        /* for linear pte/pde updates without addr mapping */
        void (*set_pte_pde)(struct amdgpu_ib *ib,
                            uint64_t pe,
@@@ -321,10 -316,6 +321,10 @@@ struct amdgpu_ring_funcs 
        /* note usage for clock and power gating */
        void (*begin_use)(struct amdgpu_ring *ring);
        void (*end_use)(struct amdgpu_ring *ring);
 +      void (*emit_switch_buffer) (struct amdgpu_ring *ring);
 +      void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 +      unsigned (*get_emit_ib_size) (struct amdgpu_ring *ring);
 +      unsigned (*get_dma_frame_size) (struct amdgpu_ring *ring);
  };
  
  /*
@@@ -405,9 -396,48 +405,8 @@@ int amdgpu_fence_wait_empty(struct amdg
  unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
  
  /*
 - * TTM.
 + * BO.
   */
 -
 -#define AMDGPU_TTM_LRU_SIZE   20
 -
 -struct amdgpu_mman_lru {
 -      struct list_head                *lru[TTM_NUM_MEM_TYPES];
 -      struct list_head                *swap_lru;
 -};
 -
 -struct amdgpu_mman {
 -      struct ttm_bo_global_ref        bo_global_ref;
 -      struct drm_global_reference     mem_global_ref;
 -      struct ttm_bo_device            bdev;
 -      bool                            mem_global_referenced;
 -      bool                            initialized;
 -
 -#if defined(CONFIG_DEBUG_FS)
 -      struct dentry                   *vram;
 -      struct dentry                   *gtt;
 -#endif
 -
 -      /* buffer handling */
 -      const struct amdgpu_buffer_funcs        *buffer_funcs;
 -      struct amdgpu_ring                      *buffer_funcs_ring;
 -      /* Scheduler entity for buffer moves */
 -      struct amd_sched_entity                 entity;
 -
 -      /* custom LRU management */
 -      struct amdgpu_mman_lru                  log2_size[AMDGPU_TTM_LRU_SIZE];
 -      /* guard for log2_size array, don't add anything in between */
 -      struct amdgpu_mman_lru                  guard;
 -};
 -
 -int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 -                     uint64_t src_offset,
 -                     uint64_t dst_offset,
 -                     uint32_t byte_count,
 -                     struct reservation_object *resv,
 -                     struct fence **fence);
 -int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
--
  struct amdgpu_bo_list_entry {
        struct amdgpu_bo                *robj;
        struct ttm_validate_buffer      tv;
@@@ -470,12 -500,10 +469,12 @@@ struct amdgpu_bo 
        struct amdgpu_device            *adev;
        struct drm_gem_object           gem_base;
        struct amdgpu_bo                *parent;
 +      struct amdgpu_bo                *shadow;
  
        struct ttm_bo_kmap_obj          dma_buf_vmap;
        struct amdgpu_mn                *mn;
        struct list_head                mn_list;
 +      struct list_head                shadow_list;
  };
  #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
  
@@@ -620,12 -648,11 +619,12 @@@ int amdgpu_gart_table_vram_pin(struct a
  void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
  int amdgpu_gart_init(struct amdgpu_device *adev);
  void amdgpu_gart_fini(struct amdgpu_device *adev);
- void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
                        int pages);
- int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                     int pages, struct page **pagelist,
                     dma_addr_t *dma_addr, uint32_t flags);
 +int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
  
  /*
   * GPU MC structures, functions & helpers
@@@ -652,8 -679,6 +651,8 @@@ struct amdgpu_mc 
        uint32_t                fw_version;
        struct amdgpu_irq_src   vm_fault;
        uint32_t                vram_type;
 +      uint32_t                srbm_soft_reset;
 +      struct amdgpu_mode_mc_save save;
  };
  
  /*
@@@ -698,11 -723,10 +697,11 @@@ void amdgpu_doorbell_get_kfd_info(struc
   */
  
  struct amdgpu_flip_work {
 -      struct work_struct              flip_work;
 +      struct delayed_work             flip_work;
        struct work_struct              unpin_work;
        struct amdgpu_device            *adev;
        int                             crtc_id;
 +      u32                             target_vblank;
        uint64_t                        base;
        struct drm_pending_vblank_event *event;
        struct amdgpu_bo                *old_rbo;
@@@ -793,17 -817,13 +792,17 @@@ struct amdgpu_ring 
  /* maximum number of VMIDs */
  #define AMDGPU_NUM_VM 16
  
 +/* Maximum number of PTEs the hardware can write with one command */
 +#define AMDGPU_VM_MAX_UPDATE_SIZE     0x3FFFF
 +
  /* number of entries in page table */
  #define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
  
  /* PTBs (Page Table Blocks) need to be aligned to 32K */
  #define AMDGPU_VM_PTB_ALIGN_SIZE   32768
 -#define AMDGPU_VM_PTB_ALIGN_MASK (AMDGPU_VM_PTB_ALIGN_SIZE - 1)
 -#define AMDGPU_VM_PTB_ALIGN(a) (((a) + AMDGPU_VM_PTB_ALIGN_MASK) & ~AMDGPU_VM_PTB_ALIGN_MASK)
 +
 +/* LOG2 number of continuous pages for the fragment field */
 +#define AMDGPU_LOG2_PAGES_PER_FRAG 4
  
  #define AMDGPU_PTE_VALID      (1 << 0)
  #define AMDGPU_PTE_SYSTEM     (1 << 1)
  #define AMDGPU_PTE_READABLE   (1 << 5)
  #define AMDGPU_PTE_WRITEABLE  (1 << 6)
  
 -/* PTE (Page Table Entry) fragment field for different page sizes */
 -#define AMDGPU_PTE_FRAG_4KB   (0 << 7)
 -#define AMDGPU_PTE_FRAG_64KB  (4 << 7)
 -#define AMDGPU_LOG2_PAGES_PER_FRAG 4
 +#define AMDGPU_PTE_FRAG(x)    ((x & 0x1f) << 7)
  
  /* How to programm VM fault handling */
  #define AMDGPU_VM_FAULT_STOP_NEVER    0
  struct amdgpu_vm_pt {
        struct amdgpu_bo_list_entry     entry;
        uint64_t                        addr;
 +      uint64_t                        shadow_addr;
  };
  
  struct amdgpu_vm {
@@@ -928,6 -950,7 +927,6 @@@ int amdgpu_vm_grab_id(struct amdgpu_vm 
                      struct amdgpu_job *job);
  int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
  void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 -uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
  int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
                                    struct amdgpu_vm *vm);
  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
@@@ -936,7 -959,7 +935,7 @@@ int amdgpu_vm_clear_invalids(struct amd
                             struct amdgpu_sync *sync);
  int amdgpu_vm_bo_update(struct amdgpu_device *adev,
                        struct amdgpu_bo_va *bo_va,
 -                      struct ttm_mem_reg *mem);
 +                      bool clear);
  void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
                             struct amdgpu_bo *bo);
  struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
@@@ -971,7 -994,6 +970,7 @@@ struct amdgpu_ctx 
        spinlock_t              ring_lock;
        struct fence            **fences;
        struct amdgpu_ctx_ring  rings[AMDGPU_MAX_RINGS];
 +      bool preamble_presented;
  };
  
  struct amdgpu_ctx_mgr {
@@@ -1175,10 -1197,6 +1174,10 @@@ struct amdgpu_gfx 
        unsigned                        ce_ram_size;
        struct amdgpu_cu_info           cu_info;
        const struct amdgpu_gfx_funcs   *funcs;
 +
 +      /* reset mask */
 +      uint32_t                        grbm_soft_reset;
 +      uint32_t                        srbm_soft_reset;
  };
  
  int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@@ -1231,16 -1249,11 +1230,16 @@@ struct amdgpu_cs_parser 
        struct fence                    *fence;
        uint64_t                        bytes_moved_threshold;
        uint64_t                        bytes_moved;
 +      struct amdgpu_bo_list_entry     *evictable;
  
        /* user fence */
        struct amdgpu_bo_list_entry     uf_entry;
  };
  
 +#define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */
 +#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1) /* bit set means preamble IB is first presented in belonging context */
 +#define AMDGPU_HAVE_CTX_SWITCH              (1 << 2) /* bit set means context switch occured */
 +
  struct amdgpu_job {
        struct amd_sched_job    base;
        struct amdgpu_device    *adev;
        struct amdgpu_sync      sync;
        struct amdgpu_ib        *ibs;
        struct fence            *fence; /* the hw fence */
 +      uint32_t                preamble_status;
        uint32_t                num_ibs;
        void                    *owner;
 -      uint64_t                ctx;
 +      uint64_t                fence_ctx; /* the fence_context this job uses */
        bool                    vm_needs_flush;
        unsigned                vm_id;
        uint64_t                vm_pd_addr;
@@@ -1673,7 -1685,6 +1672,7 @@@ struct amdgpu_uvd 
        bool                    address_64_bit;
        bool                    use_ctx_buf;
        struct amd_sched_entity entity;
 +      uint32_t                srbm_soft_reset;
  };
  
  /*
@@@ -1700,8 -1711,6 +1699,8 @@@ struct amdgpu_vce 
        struct amdgpu_irq_src   irq;
        unsigned                harvest_config;
        struct amd_sched_entity entity;
 +      uint32_t                srbm_soft_reset;
 +      unsigned                num_rings;
  };
  
  /*
@@@ -1719,14 -1728,9 +1718,14 @@@ struct amdgpu_sdma_instance 
  
  struct amdgpu_sdma {
        struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
 +#ifdef CONFIG_DRM_AMDGPU_SI
 +      //SI DMA has a difference trap irq number for the second engine
 +      struct amdgpu_irq_src   trap_irq_1;
 +#endif
        struct amdgpu_irq_src   trap_irq;
        struct amdgpu_irq_src   illegal_inst_irq;
        int                     num_instances;
 +      uint32_t                    srbm_soft_reset;
  };
  
  /*
@@@ -1828,7 -1832,6 +1827,7 @@@ struct amdgpu_asic_funcs 
        bool (*read_disabled_bios)(struct amdgpu_device *adev);
        bool (*read_bios_from_rom)(struct amdgpu_device *adev,
                                   u8 *bios, u32 length_bytes);
 +      void (*detect_hw_virtualization) (struct amdgpu_device *adev);
        int (*read_register)(struct amdgpu_device *adev, u32 se_num,
                             u32 sh_num, u32 reg_offset, u32 *value);
        void (*set_vga_state)(struct amdgpu_device *adev, bool state);
        /* MM block clocks */
        int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
        int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
 -      /* query virtual capabilities */
 -      u32 (*get_virtual_caps)(struct amdgpu_device *adev);
 +      /* static power management */
 +      int (*get_pcie_lanes)(struct amdgpu_device *adev);
 +      void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
  };
  
  /*
@@@ -1933,6 -1935,16 +1932,6 @@@ struct amdgpu_atcs 
  struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev);
  void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
  
 -
 -/* GPU virtualization */
 -#define AMDGPU_VIRT_CAPS_SRIOV_EN       (1 << 0)
 -#define AMDGPU_VIRT_CAPS_IS_VF          (1 << 1)
 -struct amdgpu_virtualization {
 -      bool supports_sr_iov;
 -      bool is_virtual;
 -      u32 caps;
 -};
 -
  /*
   * Core structure, functions and helpers.
   */
@@@ -1946,7 -1958,6 +1945,7 @@@ struct amdgpu_ip_block_status 
        bool valid;
        bool sw;
        bool hw;
 +      bool hang;
  };
  
  struct amdgpu_device {
        spinlock_t pcie_idx_lock;
        amdgpu_rreg_t                   pcie_rreg;
        amdgpu_wreg_t                   pcie_wreg;
 +      amdgpu_rreg_t                   pciep_rreg;
 +      amdgpu_wreg_t                   pciep_wreg;
        /* protects concurrent UVD register access */
        spinlock_t uvd_ctx_idx_lock;
        amdgpu_rreg_t                   uvd_ctx_rreg;
        atomic64_t                      num_evictions;
        atomic_t                        gpu_reset_counter;
  
 +      /* data for buffer migration throttling */
 +      struct {
 +              spinlock_t              lock;
 +              s64                     last_update_us;
 +              s64                     accum_us; /* accumulated microseconds */
 +              u32                     log2_max_MBps;
 +      } mm_stats;
 +
        /* display */
 +      bool                            enable_virtual_display;
        struct amdgpu_mode_info         mode_info;
        struct work_struct              hotplug_work;
        struct amdgpu_irq_src           crtc_irq;
        struct kfd_dev          *kfd;
  
        struct amdgpu_virtualization virtualization;
 +
 +      /* link all shadow bo */
 +      struct list_head                shadow_list;
 +      struct mutex                    shadow_list_lock;
 +      /* link all gtt */
 +      spinlock_t                      gtt_list_lock;
 +      struct list_head                gtt_list;
 +
  };
  
  bool amdgpu_device_is_px(struct drm_device *dev);
@@@ -2159,8 -2151,6 +2158,8 @@@ void amdgpu_mm_wdoorbell(struct amdgpu_
  #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
  #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
  #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
 +#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
 +#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
  #define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
  #define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
  #define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
  #define REG_GET_FIELD(value, reg, field)                              \
        (((value) & REG_FIELD_MASK(reg, field)) >> REG_FIELD_SHIFT(reg, field))
  
 +#define WREG32_FIELD(reg, field, val) \
 +      WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
 +
  /*
   * BIOS helpers.
   */
@@@ -2250,17 -2237,14 +2249,17 @@@ amdgpu_get_sdma_instance(struct amdgpu_
  #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
  #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
  #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
 -#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev)))
 +#define amdgpu_get_pcie_lanes(adev) (adev)->asic_funcs->get_pcie_lanes((adev))
 +#define amdgpu_set_pcie_lanes(adev, l) (adev)->asic_funcs->set_pcie_lanes((adev), (l))
 +#define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
  #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
  #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
 +#define amdgpu_asic_detect_hw_virtualization(adev) (adev)->asic_funcs->detect_hw_virtualization((adev))
  #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
  #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
  #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
  #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 -#define amdgpu_vm_write_pte(adev, ib, pa, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pa), (pe), (addr), (count), (incr), (flags)))
 +#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
  #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
  #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
  #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
  #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
  #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
  #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
 +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
  #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
  #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
 +#define amdgpu_ring_get_emit_ib_size(r) (r)->funcs->get_emit_ib_size((r))
 +#define amdgpu_ring_get_dma_frame_size(r) (r)->funcs->get_dma_frame_size((r))
  #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
  #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
  #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
  #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
  #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
  
 +#define amdgpu_dpm_read_sensor(adev, idx, value) \
 +      ((adev)->pp_enabled ? \
 +              (adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value)) : \
 +              -EINVAL)
 +
  #define amdgpu_dpm_get_temperature(adev) \
        ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
              (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
              (adev)->pm.funcs->powergate_vce((adev), (g)))
  
 -#define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
 -      ((adev)->pp_enabled ?                                           \
 -            (adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
 -            (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
 -
  #define amdgpu_dpm_get_current_power_state(adev) \
        (adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
  
  
  /* Common functions */
  int amdgpu_gpu_reset(struct amdgpu_device *adev);
 +bool amdgpu_need_backup(struct amdgpu_device *adev);
  void amdgpu_pci_config_reset(struct amdgpu_device *adev);
  bool amdgpu_card_posted(struct amdgpu_device *adev);
  void amdgpu_update_display_priority(struct amdgpu_device *adev);
@@@ -2435,10 -2414,6 +2434,10 @@@ uint32_t amdgpu_ttm_tt_pte_flags(struc
  void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base);
  void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
  void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
 +u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev);
 +int amdgpu_ttm_global_init(struct amdgpu_device *adev);
 +int amdgpu_ttm_init(struct amdgpu_device *adev);
 +void amdgpu_ttm_fini(struct amdgpu_device *adev);
  void amdgpu_program_register_sequence(struct amdgpu_device *adev,
                                             const u32 *registers,
                                             const u32 array_size);
@@@ -2450,13 -2425,11 +2449,13 @@@ void amdgpu_register_atpx_handler(void)
  void amdgpu_unregister_atpx_handler(void);
  bool amdgpu_has_atpx_dgpu_power_cntl(void);
  bool amdgpu_is_atpx_hybrid(void);
 +bool amdgpu_atpx_dgpu_req_power_for_displays(void);
  #else
  static inline void amdgpu_register_atpx_handler(void) {}
  static inline void amdgpu_unregister_atpx_handler(void) {}
  static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
  static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
 +static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; }
  #endif
  
  /*
@@@ -2473,8 -2446,8 +2472,8 @@@ void amdgpu_driver_postclose_kms(struc
                                 struct drm_file *file_priv);
  void amdgpu_driver_preclose_kms(struct drm_device *dev,
                                struct drm_file *file_priv);
 -int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon);
 -int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon);
 +int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon);
 +int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
  u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
  int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
  void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
@@@ -2520,7 -2493,6 +2519,7 @@@ static inline void amdgpu_acpi_fini(str
  struct amdgpu_bo_va_mapping *
  amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
                       uint64_t addr, struct amdgpu_bo **bo);
 +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
  
  #include "amdgpu_object.h"
  #endif
index 59961db9c390e5aea9725289ce4b2900fbab388c,fe872b82e6191046b526a250e3e11b05af1cdb3f..8e6bf548d68907f871952f0109186a03373cdd5f
@@@ -259,33 -259,6 +259,33 @@@ static const int object_connector_conve
        DRM_MODE_CONNECTOR_Unknown
  };
  
 +bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev)
 +{
 +      struct amdgpu_mode_info *mode_info = &adev->mode_info;
 +      struct atom_context *ctx = mode_info->atom_context;
 +      int index = GetIndexIntoMasterTable(DATA, Object_Header);
 +      u16 size, data_offset;
 +      u8 frev, crev;
 +      ATOM_DISPLAY_OBJECT_PATH_TABLE *path_obj;
 +      ATOM_OBJECT_HEADER *obj_header;
 +
 +      if (!amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset))
 +              return false;
 +
 +      if (crev < 2)
 +              return false;
 +
 +      obj_header = (ATOM_OBJECT_HEADER *) (ctx->bios + data_offset);
 +      path_obj = (ATOM_DISPLAY_OBJECT_PATH_TABLE *)
 +          (ctx->bios + data_offset +
 +           le16_to_cpu(obj_header->usDisplayPathTableOffset));
 +
 +      if (path_obj->ucNumOfDispPath)
 +              return true;
 +      else
 +              return false;
 +}
 +
  bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *adev)
  {
        struct amdgpu_mode_info *mode_info = &adev->mode_info;
                            (le16_to_cpu(path->usConnObjectId) &
                             OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
  
+                       /* Skip TV/CV support */
+                       if ((le16_to_cpu(path->usDeviceTag) ==
+                            ATOM_DEVICE_TV1_SUPPORT) ||
+                           (le16_to_cpu(path->usDeviceTag) ==
+                            ATOM_DEVICE_CV_SUPPORT))
+                               continue;
+                       if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) {
+                               DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n",
+                                         con_obj_id, le16_to_cpu(path->usDeviceTag));
+                               continue;
+                       }
                        connector_type =
                                object_connector_convert[con_obj_id];
                        connector_object_id = con_obj_id;
@@@ -978,48 -964,6 +991,48 @@@ int amdgpu_atombios_get_clock_dividers(
                return -EINVAL;
  
        switch (crev) {
 +      case 2:
 +      case 3:
 +      case 5:
 +              /* r6xx, r7xx, evergreen, ni, si.
 +               * TODO: add support for asic_type <= CHIP_RV770*/
 +              if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
 +                      args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
 +
 +                      amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
 +
 +                      dividers->post_div = args.v3.ucPostDiv;
 +                      dividers->enable_post_div = (args.v3.ucCntlFlag &
 +                                                   ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
 +                      dividers->enable_dithen = (args.v3.ucCntlFlag &
 +                                                 ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
 +                      dividers->whole_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDiv);
 +                      dividers->frac_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDivFrac);
 +                      dividers->ref_div = args.v3.ucRefDiv;
 +                      dividers->vco_mode = (args.v3.ucCntlFlag &
 +                                            ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
 +              } else {
 +                      /* for SI we use ComputeMemoryClockParam for memory plls */
 +                      if (adev->asic_type >= CHIP_TAHITI)
 +                              return -EINVAL;
 +                      args.v5.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
 +                      if (strobe_mode)
 +                              args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
 +
 +                      amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
 +
 +                      dividers->post_div = args.v5.ucPostDiv;
 +                      dividers->enable_post_div = (args.v5.ucCntlFlag &
 +                                                   ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
 +                      dividers->enable_dithen = (args.v5.ucCntlFlag &
 +                                                 ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
 +                      dividers->whole_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDiv);
 +                      dividers->frac_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDivFrac);
 +                      dividers->ref_div = args.v5.ucRefDiv;
 +                      dividers->vco_mode = (args.v5.ucCntlFlag &
 +                                            ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
 +              }
 +              break;
        case 4:
                /* fusion */
                args.v4.ulClock = cpu_to_le32(clock);   /* 10 khz */
@@@ -1164,32 -1108,6 +1177,32 @@@ void amdgpu_atombios_set_engine_dram_ti
        amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
  }
  
 +void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
 +                                        u16 *vddc, u16 *vddci, u16 *mvdd)
 +{
 +      struct amdgpu_mode_info *mode_info = &adev->mode_info;
 +      int index = GetIndexIntoMasterTable(DATA, FirmwareInfo);
 +      u8 frev, crev;
 +      u16 data_offset;
 +      union firmware_info *firmware_info;
 +
 +      *vddc = 0;
 +      *vddci = 0;
 +      *mvdd = 0;
 +
 +      if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
 +                                 &frev, &crev, &data_offset)) {
 +              firmware_info =
 +                      (union firmware_info *)(mode_info->atom_context->bios +
 +                                              data_offset);
 +              *vddc = le16_to_cpu(firmware_info->info_14.usBootUpVDDCVoltage);
 +              if ((frev == 2) && (crev >= 2)) {
 +                      *vddci = le16_to_cpu(firmware_info->info_22.usBootUpVDDCIVoltage);
 +                      *mvdd = le16_to_cpu(firmware_info->info_22.usBootUpMVDDCVoltage);
 +              }
 +      }
 +}
 +
  union set_voltage {
        struct _SET_VOLTAGE_PS_ALLOCATION alloc;
        struct _SET_VOLTAGE_PARAMETERS v1;
        struct _SET_VOLTAGE_PARAMETERS_V1_3 v3;
  };
  
 +int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
 +                           u16 voltage_id, u16 *voltage)
 +{
 +      union set_voltage args;
 +      int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
 +      u8 frev, crev;
 +
 +      if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
 +              return -EINVAL;
 +
 +      switch (crev) {
 +      case 1:
 +              return -EINVAL;
 +      case 2:
 +              args.v2.ucVoltageType = SET_VOLTAGE_GET_MAX_VOLTAGE;
 +              args.v2.ucVoltageMode = 0;
 +              args.v2.usVoltageLevel = 0;
 +
 +              amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
 +
 +              *voltage = le16_to_cpu(args.v2.usVoltageLevel);
 +              break;
 +      case 3:
 +              args.v3.ucVoltageType = voltage_type;
 +              args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
 +              args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
 +
 +              amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
 +
 +              *voltage = le16_to_cpu(args.v3.usVoltageLevel);
 +              break;
 +      default:
 +              DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
 +              return -EINVAL;
 +      }
 +
 +      return 0;
 +}
 +
 +int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
 +                                                    u16 *voltage,
 +                                                    u16 leakage_idx)
 +{
 +      return amdgpu_atombios_get_max_vddc(adev, VOLTAGE_TYPE_VDDC, leakage_idx, voltage);
 +}
 +
  void amdgpu_atombios_set_voltage(struct amdgpu_device *adev,
                                 u16 voltage_level,
                                 u8 voltage_type)
@@@ -1463,50 -1335,6 +1476,50 @@@ static ATOM_VOLTAGE_OBJECT_V3 *amdgpu_a
        return NULL;
  }
  
 +int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
 +                            u8 voltage_type,
 +                            u8 *svd_gpio_id, u8 *svc_gpio_id)
 +{
 +      int index = GetIndexIntoMasterTable(DATA, VoltageObjectInfo);
 +      u8 frev, crev;
 +      u16 data_offset, size;
 +      union voltage_object_info *voltage_info;
 +      union voltage_object *voltage_object = NULL;
 +
 +      if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
 +                                 &frev, &crev, &data_offset)) {
 +              voltage_info = (union voltage_object_info *)
 +                      (adev->mode_info.atom_context->bios + data_offset);
 +
 +              switch (frev) {
 +              case 3:
 +                      switch (crev) {
 +                      case 1:
 +                              voltage_object = (union voltage_object *)
 +                                      amdgpu_atombios_lookup_voltage_object_v3(&voltage_info->v3,
 +                                                                    voltage_type,
 +                                                                    VOLTAGE_OBJ_SVID2);
 +                              if (voltage_object) {
 +                                      *svd_gpio_id = voltage_object->v3.asSVID2Obj.ucSVDGpioId;
 +                                      *svc_gpio_id = voltage_object->v3.asSVID2Obj.ucSVCGpioId;
 +                              } else {
 +                                      return -EINVAL;
 +                              }
 +                              break;
 +                      default:
 +                              DRM_ERROR("unknown voltage object table\n");
 +                              return -EINVAL;
 +                      }
 +                      break;
 +              default:
 +                      DRM_ERROR("unknown voltage object table\n");
 +                      return -EINVAL;
 +              }
 +
 +      }
 +      return 0;
 +}
 +
  bool
  amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
                                u8 voltage_type, u8 voltage_mode)
index 550c5ee704ec4af5af6a0f376f93b890c052c52c,10b5ddf2c5887c36fd7f5d03f4ff6038c8e3cb8a..dae35a96a694d0b6ffc3de5aae94e7344a69e565
@@@ -29,7 -29,6 +29,7 @@@ struct amdgpu_atpx 
        acpi_handle handle;
        struct amdgpu_atpx_functions functions;
        bool is_hybrid;
 +      bool dgpu_req_power_for_displays;
  };
  
  static struct amdgpu_atpx_priv {
@@@ -74,10 -73,6 +74,10 @@@ bool amdgpu_is_atpx_hybrid(void) 
        return amdgpu_atpx_priv.atpx.is_hybrid;
  }
  
 +bool amdgpu_atpx_dgpu_req_power_for_displays(void) {
 +      return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays;
 +}
 +
  /**
   * amdgpu_atpx_call - call an ATPX method
   *
@@@ -205,23 -200,10 +205,14 @@@ static int amdgpu_atpx_validate(struct 
        atpx->is_hybrid = false;
        if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                printk("ATPX Hybrid Graphics\n");
- #if 1
-               /* This is a temporary hack until the D3 cold support
-                * makes it upstream.  The ATPX power_control method seems
-                * to still work on even if the system should be using
-                * the new standardized hybrid D3 cold ACPI interface.
-                */
-               atpx->functions.power_cntl = true;
- #else
                atpx->functions.power_cntl = false;
- #endif
                atpx->is_hybrid = true;
        }
  
 +      atpx->dgpu_req_power_for_displays = false;
 +      if (valid_bits & ATPX_DGPU_REQ_POWER_FOR_DISPLAYS)
 +              atpx->dgpu_req_power_for_displays = true;
 +
        return 0;
  }
  
index 4127e7ceace0584e4f6d9052dcd2a8fa077e7e14,ec1282af2479594b9ce4cbf44ebdbfa452e06894..6a6c86c9c1694eb475b9671c47cfaa4738473ed8
@@@ -124,8 -124,7 +124,8 @@@ int amdgpu_ib_schedule(struct amdgpu_ri
        bool skip_preamble, need_ctx_switch;
        unsigned patch_offset = ~0;
        struct amdgpu_vm *vm;
 -      uint64_t ctx;
 +      uint64_t fence_ctx;
 +      uint32_t status = 0, alloc_size;
  
        unsigned i;
        int r = 0;
        /* ring tests don't use a job */
        if (job) {
                vm = job->vm;
 -              ctx = job->ctx;
 +              fence_ctx = job->fence_ctx;
        } else {
                vm = NULL;
 -              ctx = 0;
 +              fence_ctx = 0;
        }
  
        if (!ring->ready) {
 -              dev_err(adev->dev, "couldn't schedule ib\n");
 +              dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
                return -EINVAL;
        }
  
                return -EINVAL;
        }
  
 -      r = amdgpu_ring_alloc(ring, 256 * num_ibs);
 +      alloc_size = amdgpu_ring_get_dma_frame_size(ring) +
 +              num_ibs * amdgpu_ring_get_emit_ib_size(ring);
 +
 +      r = amdgpu_ring_alloc(ring, alloc_size);
        if (r) {
                dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
                return r;
        /* always set cond_exec_polling to CONTINUE */
        *ring->cond_exe_cpu_addr = 1;
  
 -      skip_preamble = ring->current_ctx == ctx;
 -      need_ctx_switch = ring->current_ctx != ctx;
 +      skip_preamble = ring->current_ctx == fence_ctx;
 +      need_ctx_switch = ring->current_ctx != fence_ctx;
 +      if (job && ring->funcs->emit_cntxcntl) {
 +              if (need_ctx_switch)
 +                      status |= AMDGPU_HAVE_CTX_SWITCH;
 +              status |= job->preamble_status;
 +              amdgpu_ring_emit_cntxcntl(ring, status);
 +      }
 +
        for (i = 0; i < num_ibs; ++i) {
                ib = &ibs[i];
  
                /* drop preamble IBs if we don't have a context switch */
 -              if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
 +              if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
 +                      skip_preamble &&
 +                      !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST))
                        continue;
  
                amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
        if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
                amdgpu_ring_patch_cond_exec(ring, patch_offset);
  
 -      ring->current_ctx = ctx;
 +      ring->current_ctx = fence_ctx;
 +      if (ring->funcs->emit_switch_buffer)
 +              amdgpu_ring_emit_switch_buffer(ring);
        amdgpu_ring_commit(ring);
        return 0;
  }
@@@ -295,7 -280,7 +295,7 @@@ void amdgpu_ib_pool_fini(struct amdgpu_
  int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
  {
        unsigned i;
-       int r;
+       int r, ret = 0;
  
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                struct amdgpu_ring *ring = adev->rings[i];
                        } else {
                                /* still not good, but we can live with it */
                                DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r);
+                               ret = r;
                        }
                }
        }
-       return 0;
+       return ret;
  }
  
  /*
index b63969d7887caf4d575a35a94b1c9f05ff3c126e,716f2afeb6a9a4a403b56a5c28ddf9de489f8746..160a094e1a934e9269a32ccdadf71fdcae796d32
@@@ -34,7 -34,6 +34,7 @@@
  #include <ttm/ttm_placement.h>
  #include <ttm/ttm_module.h>
  #include <ttm/ttm_page_alloc.h>
 +#include <ttm/ttm_memory.h>
  #include <drm/drmP.h>
  #include <drm/amdgpu_drm.h>
  #include <linux/seq_file.h>
@@@ -75,7 -74,7 +75,7 @@@ static void amdgpu_ttm_mem_global_relea
        ttm_mem_global_release(ref->object);
  }
  
 -static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 +int amdgpu_ttm_global_init(struct amdgpu_device *adev)
  {
        struct drm_global_reference *global_ref;
        struct amdgpu_ring *ring;
        global_ref->init = &amdgpu_ttm_mem_global_init;
        global_ref->release = &amdgpu_ttm_mem_global_release;
        r = drm_global_item_ref(global_ref);
 -      if (r != 0) {
 +      if (r) {
                DRM_ERROR("Failed setting up TTM memory accounting "
                          "subsystem.\n");
 -              return r;
 +              goto error_mem;
        }
  
        adev->mman.bo_global_ref.mem_glob =
        global_ref->init = &ttm_bo_global_init;
        global_ref->release = &ttm_bo_global_release;
        r = drm_global_item_ref(global_ref);
 -      if (r != 0) {
 +      if (r) {
                DRM_ERROR("Failed setting up TTM BO subsystem.\n");
 -              drm_global_item_unref(&adev->mman.mem_global_ref);
 -              return r;
 +              goto error_bo;
        }
  
        ring = adev->mman.buffer_funcs_ring;
        rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
        r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
                                  rq, amdgpu_sched_jobs);
 -      if (r != 0) {
 +      if (r) {
                DRM_ERROR("Failed setting up TTM BO move run queue.\n");
 -              drm_global_item_unref(&adev->mman.mem_global_ref);
 -              drm_global_item_unref(&adev->mman.bo_global_ref.ref);
 -              return r;
 +              goto error_entity;
        }
  
        adev->mman.mem_global_referenced = true;
  
        return 0;
 +
 +error_entity:
 +      drm_global_item_unref(&adev->mman.bo_global_ref.ref);
 +error_bo:
 +      drm_global_item_unref(&adev->mman.mem_global_ref);
 +error_mem:
 +      return r;
  }
  
  static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
@@@ -201,7 -196,6 +201,7 @@@ static void amdgpu_evict_flags(struct t
                .lpfn = 0,
                .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
        };
 +      unsigned i;
  
        if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
                placement->placement = &placements;
        rbo = container_of(bo, struct amdgpu_bo, tbo);
        switch (bo->mem.mem_type) {
        case TTM_PL_VRAM:
 -              if (rbo->adev->mman.buffer_funcs_ring->ready == false)
 +              if (rbo->adev->mman.buffer_funcs_ring->ready == false) {
                        amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_CPU);
 -              else
 +              } else {
                        amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_GTT);
 +                      for (i = 0; i < rbo->placement.num_placement; ++i) {
 +                              if (!(rbo->placements[i].flags &
 +                                    TTM_PL_FLAG_TT))
 +                                      continue;
 +
 +                              if (rbo->placements[i].lpfn)
 +                                      continue;
 +
 +                              /* set an upper limit to force directly
 +                               * allocating address space for the BO.
 +                               */
 +                              rbo->placements[i].lpfn =
 +                                      rbo->adev->mc.gtt_size >> PAGE_SHIFT;
 +                      }
 +              }
                break;
        case TTM_PL_TT:
        default:
@@@ -246,8 -225,7 +246,8 @@@ static int amdgpu_verify_access(struct 
  
        if (amdgpu_ttm_tt_get_usermm(bo->ttm))
                return -EPERM;
 -      return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp);
 +      return drm_vma_node_verify_access(&rbo->gem_base.vma_node,
 +                                        filp->private_data);
  }
  
  static void amdgpu_move_null(struct ttm_buffer_object *bo,
@@@ -273,30 -251,26 +273,30 @@@ static int amdgpu_move_blit(struct ttm_
  
        adev = amdgpu_get_adev(bo->bdev);
        ring = adev->mman.buffer_funcs_ring;
-       old_start = old_mem->start << PAGE_SHIFT;
-       new_start = new_mem->start << PAGE_SHIFT;
+       old_start = (u64)old_mem->start << PAGE_SHIFT;
+       new_start = (u64)new_mem->start << PAGE_SHIFT;
  
        switch (old_mem->mem_type) {
 -      case TTM_PL_VRAM:
 -              old_start += adev->mc.vram_start;
 -              break;
        case TTM_PL_TT:
 -              old_start += adev->mc.gtt_start;
 +              r = amdgpu_ttm_bind(bo->ttm, old_mem);
 +              if (r)
 +                      return r;
 +
 +      case TTM_PL_VRAM:
 +              old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
                break;
        default:
                DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
                return -EINVAL;
        }
        switch (new_mem->mem_type) {
 -      case TTM_PL_VRAM:
 -              new_start += adev->mc.vram_start;
 -              break;
        case TTM_PL_TT:
 -              new_start += adev->mc.gtt_start;
 +              r = amdgpu_ttm_bind(bo->ttm, new_mem);
 +              if (r)
 +                      return r;
 +
 +      case TTM_PL_VRAM:
 +              new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
                break;
        default:
                DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
  
        r = amdgpu_copy_buffer(ring, old_start, new_start,
                               new_mem->num_pages * PAGE_SIZE, /* bytes */
 -                             bo->resv, &fence);
 +                             bo->resv, &fence, false);
        if (r)
                return r;
  
@@@ -340,7 -314,7 +340,7 @@@ static int amdgpu_move_vram_ram(struct 
        placement.num_busy_placement = 1;
        placement.busy_placement = &placements;
        placements.fpfn = 0;
 -      placements.lpfn = 0;
 +      placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
        placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
        r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
                             interruptible, no_wait_gpu);
        if (unlikely(r)) {
                goto out_cleanup;
        }
 -      r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
 +      r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem);
  out_cleanup:
        ttm_bo_mem_put(bo, &tmp_mem);
        return r;
@@@ -387,14 -361,14 +387,14 @@@ static int amdgpu_move_ram_vram(struct 
        placement.num_busy_placement = 1;
        placement.busy_placement = &placements;
        placements.fpfn = 0;
 -      placements.lpfn = 0;
 +      placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
        placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
        r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
                             interruptible, no_wait_gpu);
        if (unlikely(r)) {
                return r;
        }
 -      r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
 +      r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem);
        if (unlikely(r)) {
                goto out_cleanup;
        }
@@@ -461,7 -435,8 +461,7 @@@ static int amdgpu_bo_move(struct ttm_bu
  
        if (r) {
  memcpy:
 -              r = ttm_bo_move_memcpy(bo, evict, interruptible,
 -                                     no_wait_gpu, new_mem);
 +              r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem);
                if (r) {
                        return r;
                }
@@@ -549,7 -524,6 +549,7 @@@ struct amdgpu_ttm_tt 
        spinlock_t              guptasklock;
        struct list_head        guptasks;
        atomic_t                mmu_invalidations;
 +      struct list_head        list;
  };
  
  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
@@@ -667,6 -641,7 +667,6 @@@ static int amdgpu_ttm_backend_bind(stru
                                   struct ttm_mem_reg *bo_mem)
  {
        struct amdgpu_ttm_tt *gtt = (void*)ttm;
 -      uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
        int r;
  
        if (gtt->userptr) {
                        return r;
                }
        }
 -      gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
 +      gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
        if (!ttm->num_pages) {
                WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
                     ttm->num_pages, bo_mem, ttm);
            bo_mem->mem_type == AMDGPU_PL_OA)
                return -EINVAL;
  
 +      return 0;
 +}
 +
 +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
 +{
 +      struct amdgpu_ttm_tt *gtt = (void *)ttm;
 +
 +      return gtt && !list_empty(&gtt->list);
 +}
 +
 +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
 +{
 +      struct amdgpu_ttm_tt *gtt = (void *)ttm;
 +      uint32_t flags;
 +      int r;
 +
 +      if (!ttm || amdgpu_ttm_is_bound(ttm))
 +              return 0;
 +
 +      flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
        r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
                ttm->pages, gtt->ttm.dma_address, flags);
  
        if (r) {
 -              DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
 -                        ttm->num_pages, (unsigned)gtt->offset);
 +              DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
 +                        ttm->num_pages, gtt->offset);
                return r;
        }
 +      spin_lock(&gtt->adev->gtt_list_lock);
 +      list_add_tail(&gtt->list, &gtt->adev->gtt_list);
 +      spin_unlock(&gtt->adev->gtt_list_lock);
 +      return 0;
 +}
 +
 +int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
 +{
 +      struct amdgpu_ttm_tt *gtt, *tmp;
 +      struct ttm_mem_reg bo_mem;
 +      uint32_t flags;
 +      int r;
 +
 +      bo_mem.mem_type = TTM_PL_TT;
 +      spin_lock(&adev->gtt_list_lock);
 +      list_for_each_entry_safe(gtt, tmp, &adev->gtt_list, list) {
 +              flags = amdgpu_ttm_tt_pte_flags(gtt->adev, &gtt->ttm.ttm, &bo_mem);
 +              r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
 +                                   gtt->ttm.ttm.pages, gtt->ttm.dma_address,
 +                                   flags);
 +              if (r) {
 +                      spin_unlock(&adev->gtt_list_lock);
 +                      DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
 +                                gtt->ttm.ttm.num_pages, gtt->offset);
 +                      return r;
 +              }
 +      }
 +      spin_unlock(&adev->gtt_list_lock);
        return 0;
  }
  
@@@ -750,9 -677,6 +750,9 @@@ static int amdgpu_ttm_backend_unbind(st
  {
        struct amdgpu_ttm_tt *gtt = (void *)ttm;
  
 +      if (!amdgpu_ttm_is_bound(ttm))
 +              return 0;
 +
        /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
        if (gtt->adev->gart.ready)
                amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
        if (gtt->userptr)
                amdgpu_ttm_tt_unpin_userptr(ttm);
  
 +      spin_lock(&gtt->adev->gtt_list_lock);
 +      list_del_init(&gtt->list);
 +      spin_unlock(&gtt->adev->gtt_list_lock);
 +
        return 0;
  }
  
@@@ -800,7 -720,6 +800,7 @@@ static struct ttm_tt *amdgpu_ttm_tt_cre
                kfree(gtt);
                return NULL;
        }
 +      INIT_LIST_HEAD(&gtt->list);
        return &gtt->ttm.ttm;
  }
  
@@@ -1072,6 -991,10 +1072,6 @@@ int amdgpu_ttm_init(struct amdgpu_devic
        unsigned i, j;
        int r;
  
 -      r = amdgpu_ttm_global_init(adev);
 -      if (r) {
 -              return r;
 -      }
        /* No others user of address space so set it to 0 */
        r = ttm_bo_device_init(&adev->mman.bdev,
                               adev->mman.bo_global_ref.ref.object,
@@@ -1236,7 -1159,7 +1236,7 @@@ int amdgpu_copy_buffer(struct amdgpu_ri
                       uint64_t dst_offset,
                       uint32_t byte_count,
                       struct reservation_object *resv,
 -                     struct fence **fence)
 +                     struct fence **fence, bool direct_submit)
  {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_job *job;
                byte_count -= cur_size_in_bytes;
        }
  
 +      amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 +      WARN_ON(job->ibs[0].length_dw > num_dw);
 +      if (direct_submit) {
 +              r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
 +                                     NULL, NULL, fence);
 +              job->fence = fence_get(*fence);
 +              if (r)
 +                      DRM_ERROR("Error scheduling IBs (%d)\n", r);
 +              amdgpu_job_free(job);
 +      } else {
 +              r = amdgpu_job_submit(job, ring, &adev->mman.entity,
 +                                    AMDGPU_FENCE_OWNER_UNDEFINED, fence);
 +              if (r)
 +                      goto error_free;
 +      }
 +
 +      return r;
 +
 +error_free:
 +      amdgpu_job_free(job);
 +      return r;
 +}
 +
 +int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 +              uint32_t src_data,
 +              struct reservation_object *resv,
 +              struct fence **fence)
 +{
 +      struct amdgpu_device *adev = bo->adev;
 +      struct amdgpu_job *job;
 +      struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 +
 +      uint32_t max_bytes, byte_count;
 +      uint64_t dst_offset;
 +      unsigned int num_loops, num_dw;
 +      unsigned int i;
 +      int r;
 +
 +      byte_count = bo->tbo.num_pages << PAGE_SHIFT;
 +      max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
 +      num_loops = DIV_ROUND_UP(byte_count, max_bytes);
 +      num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
 +
 +      /* for IB padding */
 +      while (num_dw & 0x7)
 +              num_dw++;
 +
 +      r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
 +      if (r)
 +              return r;
 +
 +      if (resv) {
 +              r = amdgpu_sync_resv(adev, &job->sync, resv,
 +                              AMDGPU_FENCE_OWNER_UNDEFINED);
 +              if (r) {
 +                      DRM_ERROR("sync failed (%d).\n", r);
 +                      goto error_free;
 +              }
 +      }
 +
 +      dst_offset = bo->tbo.mem.start << PAGE_SHIFT;
 +      for (i = 0; i < num_loops; i++) {
 +              uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
 +
 +              amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
 +                              dst_offset, cur_size_in_bytes);
 +
 +              dst_offset += cur_size_in_bytes;
 +              byte_count -= cur_size_in_bytes;
 +      }
 +
        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
        WARN_ON(job->ibs[0].length_dw > num_dw);
        r = amdgpu_job_submit(job, ring, &adev->mman.entity,
 -                            AMDGPU_FENCE_OWNER_UNDEFINED, fence);
 +                      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
        if (r)
                goto error_free;
  
@@@ -1543,8 -1395,3 +1543,8 @@@ static void amdgpu_ttm_debugfs_fini(str
  
  #endif
  }
 +
 +u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev)
 +{
 +      return ttm_get_kernel_zone_memory_size(adev->mman.mem_global_ref.object);
 +}
index 25dd58a65905d8e4a2e5680842ba27fe36709f0e,4aa993d190189aff82ecc44d18898df8d5164708..cee7bc9a2314dce0c315f768c14dd498e67810dc
@@@ -201,14 -201,39 +201,14 @@@ int amdgpu_uvd_sw_init(struct amdgpu_de
        bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
                  +  AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
                  +  AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
 -      r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true,
 -                           AMDGPU_GEM_DOMAIN_VRAM,
 -                           AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
 -                           NULL, NULL, &adev->uvd.vcpu_bo);
 +      r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
 +                                  AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
 +                                  &adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
        if (r) {
                dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
                return r;
        }
  
 -      r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
 -      if (r) {
 -              amdgpu_bo_unref(&adev->uvd.vcpu_bo);
 -              dev_err(adev->dev, "(%d) failed to reserve UVD bo\n", r);
 -              return r;
 -      }
 -
 -      r = amdgpu_bo_pin(adev->uvd.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM,
 -                        &adev->uvd.gpu_addr);
 -      if (r) {
 -              amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
 -              amdgpu_bo_unref(&adev->uvd.vcpu_bo);
 -              dev_err(adev->dev, "(%d) UVD bo pin failed\n", r);
 -              return r;
 -      }
 -
 -      r = amdgpu_bo_kmap(adev->uvd.vcpu_bo, &adev->uvd.cpu_addr);
 -      if (r) {
 -              dev_err(adev->dev, "(%d) UVD map failed\n", r);
 -              return r;
 -      }
 -
 -      amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
 -
        ring = &adev->uvd.ring;
        rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
        r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity,
  
  int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
  {
 -      int r;
 -
        kfree(adev->uvd.saved_bo);
  
        amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
  
 -      if (adev->uvd.vcpu_bo) {
 -              r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
 -              if (!r) {
 -                      amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
 -                      amdgpu_bo_unpin(adev->uvd.vcpu_bo);
 -                      amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
 -              }
 -
 -              amdgpu_bo_unref(&adev->uvd.vcpu_bo);
 -      }
 +      amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
 +                            &adev->uvd.gpu_addr,
 +                            (void **)&adev->uvd.cpu_addr);
  
        amdgpu_ring_fini(&adev->uvd.ring);
  
@@@ -289,7 -323,7 +289,7 @@@ int amdgpu_uvd_suspend(struct amdgpu_de
        if (!adev->uvd.saved_bo)
                return -ENOMEM;
  
 -      memcpy(adev->uvd.saved_bo, ptr, size);
 +      memcpy_fromio(adev->uvd.saved_bo, ptr, size);
  
        return 0;
  }
@@@ -306,7 -340,7 +306,7 @@@ int amdgpu_uvd_resume(struct amdgpu_dev
        ptr = adev->uvd.cpu_addr;
  
        if (adev->uvd.saved_bo != NULL) {
 -              memcpy(ptr, adev->uvd.saved_bo, size);
 +              memcpy_toio(ptr, adev->uvd.saved_bo, size);
                kfree(adev->uvd.saved_bo);
                adev->uvd.saved_bo = NULL;
        } else {
  
                hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
                offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
 -              memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset,
 -                      (adev->uvd.fw->size) - offset);
 +              memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
 +                          le32_to_cpu(hdr->ucode_size_bytes));
                size -= le32_to_cpu(hdr->ucode_size_bytes);
                ptr += le32_to_cpu(hdr->ucode_size_bytes);
 -              memset(ptr, 0, size);
 +              memset_io(ptr, 0, size);
        }
  
        return 0;
@@@ -809,7 -843,6 +809,7 @@@ static int amdgpu_uvd_cs_reg(struct amd
                                return r;
                        break;
                case mmUVD_ENGINE_CNTL:
 +              case mmUVD_NO_OP:
                        break;
                default:
                        DRM_ERROR("Invalid reg 0x%X!\n", reg);
@@@ -882,10 -915,6 +882,10 @@@ int amdgpu_uvd_ring_parse_cs(struct amd
                return -EINVAL;
        }
  
 +      r = amdgpu_cs_sysvm_access_required(parser);
 +      if (r)
 +              return r;
 +
        ctx.parser = parser;
        ctx.buf_sizes = buf_sizes;
        ctx.ib_idx = ib_idx;
@@@ -952,10 -981,8 +952,10 @@@ static int amdgpu_uvd_send_msg(struct a
        ib->ptr[3] = addr >> 32;
        ib->ptr[4] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
        ib->ptr[5] = 0;
 -      for (i = 6; i < 16; ++i)
 -              ib->ptr[i] = PACKET2(0);
 +      for (i = 6; i < 16; i += 2) {
 +              ib->ptr[i] = PACKET0(mmUVD_NO_OP, 0);
 +              ib->ptr[i+1] = 0;
 +      }
        ib->length_dw = 16;
  
        if (direct) {
@@@ -1087,9 -1114,15 +1087,9 @@@ static void amdgpu_uvd_idle_work_handle
  {
        struct amdgpu_device *adev =
                container_of(work, struct amdgpu_device, uvd.idle_work.work);
 -      unsigned i, fences, handles = 0;
 -
 -      fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
 -
 -      for (i = 0; i < adev->uvd.max_handles; ++i)
 -              if (atomic_read(&adev->uvd.handles[i]))
 -                      ++handles;
 +      unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
  
 -      if (fences == 0 && handles == 0) {
 +      if (fences == 0) {
                if (adev->pm.dpm_enabled) {
                        amdgpu_dpm_enable_uvd(adev, false);
                } else {
@@@ -1154,7 -1187,8 +1154,8 @@@ int amdgpu_uvd_ring_test_ib(struct amdg
                r = 0;
        }
  
- error:
        fence_put(fence);
+ error:
        return r;
  }
index bd5af328154f92525cea46ac8912f808d6200555,80120fa4092c76164f460ba5cef4de1bce1d6d83..a6a48ed9562e89cf2d229ba0153afc7907ac8dfa
   * SI supports 16.
   */
  
 -/* Special value that no flush is necessary */
 -#define AMDGPU_VM_NO_FLUSH (~0ll)
 -
  /* Local structure. Encapsulate some VM table update parameters to reduce
   * the number of function parameters
   */
 -struct amdgpu_vm_update_params {
 +struct amdgpu_pte_update_params {
 +      /* amdgpu device we do this update for */
 +      struct amdgpu_device *adev;
        /* address where to copy page table entries from */
        uint64_t src;
 -      /* DMA addresses to use for mapping */
 -      dma_addr_t *pages_addr;
        /* indirect buffer to fill with commands */
        struct amdgpu_ib *ib;
 +      /* Function which actually does the update */
 +      void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
 +                   uint64_t addr, unsigned count, uint32_t incr,
 +                   uint32_t flags);
 +      /* indicate update pt or its shadow */
 +      bool shadow;
  };
  
  /**
@@@ -470,9 -467,10 +470,9 @@@ struct amdgpu_bo_va *amdgpu_vm_bo_find(
  }
  
  /**
 - * amdgpu_vm_update_pages - helper to call the right asic function
 + * amdgpu_vm_do_set_ptes - helper to call the right asic function
   *
 - * @adev: amdgpu_device pointer
 - * @vm_update_params: see amdgpu_vm_update_params definition
 + * @params: see amdgpu_pte_update_params definition
   * @pe: addr of the page entry
   * @addr: dst addr to write into pe
   * @count: number of page entries to update
   * Traces the parameters and calls the right asic functions
   * to setup the page table using the DMA.
   */
 -static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
 -                                 struct amdgpu_vm_update_params
 -                                      *vm_update_params,
 -                                 uint64_t pe, uint64_t addr,
 -                                 unsigned count, uint32_t incr,
 -                                 uint32_t flags)
 +static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
 +                                uint64_t pe, uint64_t addr,
 +                                unsigned count, uint32_t incr,
 +                                uint32_t flags)
  {
        trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
  
 -      if (vm_update_params->src) {
 -              amdgpu_vm_copy_pte(adev, vm_update_params->ib,
 -                      pe, (vm_update_params->src + (addr >> 12) * 8), count);
 -
 -      } else if (vm_update_params->pages_addr) {
 -              amdgpu_vm_write_pte(adev, vm_update_params->ib,
 -                      vm_update_params->pages_addr,
 -                      pe, addr, count, incr, flags);
 -
 -      } else if (count < 3) {
 -              amdgpu_vm_write_pte(adev, vm_update_params->ib, NULL, pe, addr,
 -                                  count, incr, flags);
 +      if (count < 3) {
 +              amdgpu_vm_write_pte(params->adev, params->ib, pe,
 +                                  addr | flags, count, incr);
  
        } else {
 -              amdgpu_vm_set_pte_pde(adev, vm_update_params->ib, pe, addr,
 +              amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
                                      count, incr, flags);
        }
  }
  
 +/**
 + * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
 + *
 + * @params: see amdgpu_pte_update_params definition
 + * @pe: addr of the page entry
 + * @addr: dst addr to write into pe
 + * @count: number of page entries to update
 + * @incr: increase next addr by incr bytes
 + * @flags: hw access flags
 + *
 + * Traces the parameters and calls the DMA function to copy the PTEs.
 + */
 +static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
 +                                 uint64_t pe, uint64_t addr,
 +                                 unsigned count, uint32_t incr,
 +                                 uint32_t flags)
 +{
 +      trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
 +
 +      amdgpu_vm_copy_pte(params->adev, params->ib, pe,
 +                         (params->src + (addr >> 12) * 8), count);
 +}
 +
  /**
   * amdgpu_vm_clear_bo - initially clear the page dir/table
   *
@@@ -537,11 -523,12 +537,11 @@@ static int amdgpu_vm_clear_bo(struct am
        struct amdgpu_ring *ring;
        struct fence *fence = NULL;
        struct amdgpu_job *job;
 -      struct amdgpu_vm_update_params vm_update_params;
 +      struct amdgpu_pte_update_params params;
        unsigned entries;
        uint64_t addr;
        int r;
  
 -      memset(&vm_update_params, 0, sizeof(vm_update_params));
        ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
  
        r = reservation_object_reserve_shared(bo->tbo.resv);
        if (r)
                goto error;
  
 -      vm_update_params.ib = &job->ibs[0];
 -      amdgpu_vm_update_pages(adev, &vm_update_params, addr, 0, entries,
 -                             0, 0);
 +      memset(&params, 0, sizeof(params));
 +      params.adev = adev;
 +      params.ib = &job->ibs[0];
 +      amdgpu_vm_do_set_ptes(&params, addr, 0, entries, 0, 0);
        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
  
        WARN_ON(job->ibs[0].length_dw > 64);
@@@ -591,41 -577,55 +591,41 @@@ error
   * Look up the physical address of the page that the pte resolves
   * to and return the pointer for the page table entry.
   */
 -uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 +static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
  {
        uint64_t result;
  
 -      if (pages_addr) {
 -              /* page table offset */
 -              result = pages_addr[addr >> PAGE_SHIFT];
 -
 -              /* in case cpu page size != gpu page size*/
 -              result |= addr & (~PAGE_MASK);
 +      /* page table offset */
 +      result = pages_addr[addr >> PAGE_SHIFT];
  
 -      } else {
 -              /* No mapping required */
 -              result = addr;
 -      }
 +      /* in case cpu page size != gpu page size*/
 +      result |= addr & (~PAGE_MASK);
  
        result &= 0xFFFFFFFFFFFFF000ULL;
  
        return result;
  }
  
 -/**
 - * amdgpu_vm_update_pdes - make sure that page directory is valid
 - *
 - * @adev: amdgpu_device pointer
 - * @vm: requested vm
 - * @start: start of GPU address range
 - * @end: end of GPU address range
 - *
 - * Allocates new page tables if necessary
 - * and updates the page directory.
 - * Returns 0 for success, error for failure.
 - */
 -int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 -                                  struct amdgpu_vm *vm)
 +static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev,
 +                                       struct amdgpu_vm *vm,
 +                                       bool shadow)
  {
        struct amdgpu_ring *ring;
 -      struct amdgpu_bo *pd = vm->page_directory;
 -      uint64_t pd_addr = amdgpu_bo_gpu_offset(pd);
 +      struct amdgpu_bo *pd = shadow ? vm->page_directory->shadow :
 +              vm->page_directory;
 +      uint64_t pd_addr;
        uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
        uint64_t last_pde = ~0, last_pt = ~0;
        unsigned count = 0, pt_idx, ndw;
        struct amdgpu_job *job;
 -      struct amdgpu_vm_update_params vm_update_params;
 +      struct amdgpu_pte_update_params params;
        struct fence *fence = NULL;
  
        int r;
  
 -      memset(&vm_update_params, 0, sizeof(vm_update_params));
 +      if (!pd)
 +              return 0;
 +      pd_addr = amdgpu_bo_gpu_offset(pd);
        ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
  
        /* padding, etc. */
        if (r)
                return r;
  
 -      vm_update_params.ib = &job->ibs[0];
 +      memset(&params, 0, sizeof(params));
 +      params.adev = adev;
 +      params.ib = &job->ibs[0];
  
        /* walk over the address space and update the page directory */
        for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
                        continue;
  
                pt = amdgpu_bo_gpu_offset(bo);
 -              if (vm->page_tables[pt_idx].addr == pt)
 -                      continue;
 -              vm->page_tables[pt_idx].addr = pt;
 +              if (!shadow) {
 +                      if (vm->page_tables[pt_idx].addr == pt)
 +                              continue;
 +                      vm->page_tables[pt_idx].addr = pt;
 +              } else {
 +                      if (vm->page_tables[pt_idx].shadow_addr == pt)
 +                              continue;
 +                      vm->page_tables[pt_idx].shadow_addr = pt;
 +              }
  
                pde = pd_addr + pt_idx * 8;
                if (((last_pde + 8 * count) != pde) ||
 -                  ((last_pt + incr * count) != pt)) {
 +                  ((last_pt + incr * count) != pt) ||
 +                  (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
  
                        if (count) {
 -                              amdgpu_vm_update_pages(adev, &vm_update_params,
 -                                                     last_pde, last_pt,
 -                                                     count, incr,
 -                                                     AMDGPU_PTE_VALID);
 +                              amdgpu_vm_do_set_ptes(&params, last_pde,
 +                                                    last_pt, count, incr,
 +                                                    AMDGPU_PTE_VALID);
                        }
  
                        count = 1;
        }
  
        if (count)
 -              amdgpu_vm_update_pages(adev, &vm_update_params,
 -                                      last_pde, last_pt,
 -                                      count, incr, AMDGPU_PTE_VALID);
 +              amdgpu_vm_do_set_ptes(&params, last_pde, last_pt,
 +                                    count, incr, AMDGPU_PTE_VALID);
  
 -      if (vm_update_params.ib->length_dw != 0) {
 -              amdgpu_ring_pad_ib(ring, vm_update_params.ib);
 +      if (params.ib->length_dw != 0) {
 +              amdgpu_ring_pad_ib(ring, params.ib);
                amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv,
                                 AMDGPU_FENCE_OWNER_VM);
 -              WARN_ON(vm_update_params.ib->length_dw > ndw);
 +              WARN_ON(params.ib->length_dw > ndw);
                r = amdgpu_job_submit(job, ring, &vm->entity,
                                      AMDGPU_FENCE_OWNER_VM, &fence);
                if (r)
@@@ -710,33 -703,92 +710,33 @@@ error_free
        return r;
  }
  
 -/**
 - * amdgpu_vm_frag_ptes - add fragment information to PTEs
 +/*
 + * amdgpu_vm_update_pdes - make sure that page directory is valid
   *
   * @adev: amdgpu_device pointer
 - * @vm_update_params: see amdgpu_vm_update_params definition
 - * @pe_start: first PTE to handle
 - * @pe_end: last PTE to handle
 - * @addr: addr those PTEs should point to
 - * @flags: hw mapping flags
 + * @vm: requested vm
 + * @start: start of GPU address range
 + * @end: end of GPU address range
 + *
 + * Allocates new page tables if necessary
 + * and updates the page directory.
 + * Returns 0 for success, error for failure.
   */
 -static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 -                              struct amdgpu_vm_update_params
 -                                      *vm_update_params,
 -                              uint64_t pe_start, uint64_t pe_end,
 -                              uint64_t addr, uint32_t flags)
 +int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 +                                   struct amdgpu_vm *vm)
  {
 -      /**
 -       * The MC L1 TLB supports variable sized pages, based on a fragment
 -       * field in the PTE. When this field is set to a non-zero value, page
 -       * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
 -       * flags are considered valid for all PTEs within the fragment range
 -       * and corresponding mappings are assumed to be physically contiguous.
 -       *
 -       * The L1 TLB can store a single PTE for the whole fragment,
 -       * significantly increasing the space available for translation
 -       * caching. This leads to large improvements in throughput when the
 -       * TLB is under pressure.
 -       *
 -       * The L2 TLB distributes small and large fragments into two
 -       * asymmetric partitions. The large fragment cache is significantly
 -       * larger. Thus, we try to use large fragments wherever possible.
 -       * Userspace can support this by aligning virtual base address and
 -       * allocation size to the fragment size.
 -       */
 -
 -      /* SI and newer are optimized for 64KB */
 -      uint64_t frag_flags = AMDGPU_PTE_FRAG_64KB;
 -      uint64_t frag_align = 0x80;
 -
 -      uint64_t frag_start = ALIGN(pe_start, frag_align);
 -      uint64_t frag_end = pe_end & ~(frag_align - 1);
 -
 -      unsigned count;
 -
 -      /* Abort early if there isn't anything to do */
 -      if (pe_start == pe_end)
 -              return;
 -
 -      /* system pages are non continuously */
 -      if (vm_update_params->src || vm_update_params->pages_addr ||
 -              !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
 -
 -              count = (pe_end - pe_start) / 8;
 -              amdgpu_vm_update_pages(adev, vm_update_params, pe_start,
 -                                     addr, count, AMDGPU_GPU_PAGE_SIZE,
 -                                     flags);
 -              return;
 -      }
 -
 -      /* handle the 4K area at the beginning */
 -      if (pe_start != frag_start) {
 -              count = (frag_start - pe_start) / 8;
 -              amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr,
 -                                     count, AMDGPU_GPU_PAGE_SIZE, flags);
 -              addr += AMDGPU_GPU_PAGE_SIZE * count;
 -      }
 -
 -      /* handle the area in the middle */
 -      count = (frag_end - frag_start) / 8;
 -      amdgpu_vm_update_pages(adev, vm_update_params, frag_start, addr, count,
 -                             AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
 +      int r;
  
 -      /* handle the 4K area at the end */
 -      if (frag_end != pe_end) {
 -              addr += AMDGPU_GPU_PAGE_SIZE * count;
 -              count = (pe_end - frag_end) / 8;
 -              amdgpu_vm_update_pages(adev, vm_update_params, frag_end, addr,
 -                                     count, AMDGPU_GPU_PAGE_SIZE, flags);
 -      }
 +      r = amdgpu_vm_update_pd_or_shadow(adev, vm, true);
 +      if (r)
 +              return r;
 +      return amdgpu_vm_update_pd_or_shadow(adev, vm, false);
  }
  
  /**
   * amdgpu_vm_update_ptes - make sure that page tables are valid
   *
 - * @adev: amdgpu_device pointer
 - * @vm_update_params: see amdgpu_vm_update_params definition
 + * @params: see amdgpu_pte_update_params definition
   * @vm: requested vm
   * @start: start of GPU address range
   * @end: end of GPU address range
   *
   * Update the page tables in the range @start - @end.
   */
 -static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 -                                struct amdgpu_vm_update_params
 -                                      *vm_update_params,
 +static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
                                  struct amdgpu_vm *vm,
                                  uint64_t start, uint64_t end,
                                  uint64_t dst, uint32_t flags)
  {
        const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
  
 -      uint64_t cur_pe_start, cur_pe_end, cur_dst;
 +      uint64_t cur_pe_start, cur_nptes, cur_dst;
        uint64_t addr; /* next GPU address to be updated */
        uint64_t pt_idx;
        struct amdgpu_bo *pt;
        addr = start;
        pt_idx = addr >> amdgpu_vm_block_size;
        pt = vm->page_tables[pt_idx].entry.robj;
 -
 +      if (params->shadow) {
 +              if (!pt->shadow)
 +                      return;
 +              pt = vm->page_tables[pt_idx].entry.robj->shadow;
 +      }
        if ((addr & ~mask) == (end & ~mask))
                nptes = end - addr;
        else
  
        cur_pe_start = amdgpu_bo_gpu_offset(pt);
        cur_pe_start += (addr & mask) * 8;
 -      cur_pe_end = cur_pe_start + 8 * nptes;
 +      cur_nptes = nptes;
        cur_dst = dst;
  
        /* for next ptb*/
        while (addr < end) {
                pt_idx = addr >> amdgpu_vm_block_size;
                pt = vm->page_tables[pt_idx].entry.robj;
 +              if (params->shadow) {
 +                      if (!pt->shadow)
 +                              return;
 +                      pt = vm->page_tables[pt_idx].entry.robj->shadow;
 +              }
  
                if ((addr & ~mask) == (end & ~mask))
                        nptes = end - addr;
                next_pe_start = amdgpu_bo_gpu_offset(pt);
                next_pe_start += (addr & mask) * 8;
  
 -              if (cur_pe_end == next_pe_start) {
 +              if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
 +                  ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
                        /* The next ptb is consecutive to current ptb.
 -                       * Don't call amdgpu_vm_frag_ptes now.
 +                       * Don't call the update function now.
                         * Will update two ptbs together in future.
                        */
 -                      cur_pe_end += 8 * nptes;
 +                      cur_nptes += nptes;
                } else {
 -                      amdgpu_vm_frag_ptes(adev, vm_update_params,
 -                                          cur_pe_start, cur_pe_end,
 -                                          cur_dst, flags);
 +                      params->func(params, cur_pe_start, cur_dst, cur_nptes,
 +                                   AMDGPU_GPU_PAGE_SIZE, flags);
  
                        cur_pe_start = next_pe_start;
 -                      cur_pe_end = next_pe_start + 8 * nptes;
 +                      cur_nptes = nptes;
                        cur_dst = dst;
                }
  
                dst += nptes * AMDGPU_GPU_PAGE_SIZE;
        }
  
 -      amdgpu_vm_frag_ptes(adev, vm_update_params, cur_pe_start,
 -                          cur_pe_end, cur_dst, flags);
 +      params->func(params, cur_pe_start, cur_dst, cur_nptes,
 +                   AMDGPU_GPU_PAGE_SIZE, flags);
 +}
 +
 +/*
 + * amdgpu_vm_frag_ptes - add fragment information to PTEs
 + *
 + * @params: see amdgpu_pte_update_params definition
 + * @vm: requested vm
 + * @start: first PTE to handle
 + * @end: last PTE to handle
 + * @dst: addr those PTEs should point to
 + * @flags: hw mapping flags
 + */
 +static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params       *params,
 +                              struct amdgpu_vm *vm,
 +                              uint64_t start, uint64_t end,
 +                              uint64_t dst, uint32_t flags)
 +{
 +      /**
 +       * The MC L1 TLB supports variable sized pages, based on a fragment
 +       * field in the PTE. When this field is set to a non-zero value, page
 +       * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
 +       * flags are considered valid for all PTEs within the fragment range
 +       * and corresponding mappings are assumed to be physically contiguous.
 +       *
 +       * The L1 TLB can store a single PTE for the whole fragment,
 +       * significantly increasing the space available for translation
 +       * caching. This leads to large improvements in throughput when the
 +       * TLB is under pressure.
 +       *
 +       * The L2 TLB distributes small and large fragments into two
 +       * asymmetric partitions. The large fragment cache is significantly
 +       * larger. Thus, we try to use large fragments wherever possible.
 +       * Userspace can support this by aligning virtual base address and
 +       * allocation size to the fragment size.
 +       */
 +
 +      const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
 +
 +      uint64_t frag_start = ALIGN(start, frag_align);
 +      uint64_t frag_end = end & ~(frag_align - 1);
 +
 +      uint32_t frag;
 +
 +      /* system pages are non continuously */
 +      if (params->src || !(flags & AMDGPU_PTE_VALID) ||
 +          (frag_start >= frag_end)) {
 +
 +              amdgpu_vm_update_ptes(params, vm, start, end, dst, flags);
 +              return;
 +      }
 +
 +      /* use more than 64KB fragment size if possible */
 +      frag = lower_32_bits(frag_start | frag_end);
 +      frag = likely(frag) ? __ffs(frag) : 31;
 +
 +      /* handle the 4K area at the beginning */
 +      if (start != frag_start) {
 +              amdgpu_vm_update_ptes(params, vm, start, frag_start,
 +                                    dst, flags);
 +              dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
 +      }
 +
 +      /* handle the area in the middle */
 +      amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
 +                            flags | AMDGPU_PTE_FRAG(frag));
 +
 +      /* handle the 4K area at the end */
 +      if (frag_end != end) {
 +              dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
 +              amdgpu_vm_update_ptes(params, vm, frag_end, end, dst, flags);
 +      }
  }
  
  /**
@@@ -926,19 -900,14 +926,19 @@@ static int amdgpu_vm_bo_update_mapping(
        void *owner = AMDGPU_FENCE_OWNER_VM;
        unsigned nptes, ncmds, ndw;
        struct amdgpu_job *job;
 -      struct amdgpu_vm_update_params vm_update_params;
 +      struct amdgpu_pte_update_params params;
        struct fence *f = NULL;
        int r;
  
 +      memset(&params, 0, sizeof(params));
 +      params.adev = adev;
 +      params.src = src;
 +
        ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 -      memset(&vm_update_params, 0, sizeof(vm_update_params));
 -      vm_update_params.src = src;
 -      vm_update_params.pages_addr = pages_addr;
 +
 +      memset(&params, 0, sizeof(params));
 +      params.adev = adev;
 +      params.src = src;
  
        /* sync to everything on unmapping */
        if (!(flags & AMDGPU_PTE_VALID))
        /* padding, etc. */
        ndw = 64;
  
 -      if (vm_update_params.src) {
 +      if (src) {
                /* only copy commands needed */
                ndw += ncmds * 7;
  
 -      } else if (vm_update_params.pages_addr) {
 -              /* header for write data commands */
 -              ndw += ncmds * 4;
 +              params.func = amdgpu_vm_do_copy_ptes;
 +
 +      } else if (pages_addr) {
 +              /* copy commands needed */
 +              ndw += ncmds * 7;
  
 -              /* body of write data command */
 +              /* and also PTEs */
                ndw += nptes * 2;
  
 +              params.func = amdgpu_vm_do_copy_ptes;
 +
        } else {
                /* set page commands needed */
                ndw += ncmds * 10;
  
                /* two extra commands for begin/end of fragment */
                ndw += 2 * 10;
 +
 +              params.func = amdgpu_vm_do_set_ptes;
        }
  
        r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
        if (r)
                return r;
  
 -      vm_update_params.ib = &job->ibs[0];
 +      params.ib = &job->ibs[0];
 +
 +      if (!src && pages_addr) {
 +              uint64_t *pte;
 +              unsigned i;
 +
 +              /* Put the PTEs at the end of the IB. */
 +              i = ndw - nptes * 2;
 +              pte= (uint64_t *)&(job->ibs->ptr[i]);
 +              params.src = job->ibs->gpu_addr + i * 4;
 +
 +              for (i = 0; i < nptes; ++i) {
 +                      pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
 +                                                  AMDGPU_GPU_PAGE_SIZE);
 +                      pte[i] |= flags;
 +              }
 +      }
  
        r = amdgpu_sync_fence(adev, &job->sync, exclusive);
        if (r)
        if (r)
                goto error_free;
  
 -      amdgpu_vm_update_ptes(adev, &vm_update_params, vm, start,
 -                            last + 1, addr, flags);
 +      params.shadow = true;
 +      amdgpu_vm_frag_ptes(&params, vm, start, last + 1, addr, flags);
 +      params.shadow = false;
 +      amdgpu_vm_frag_ptes(&params, vm, start, last + 1, addr, flags);
  
 -      amdgpu_ring_pad_ib(ring, vm_update_params.ib);
 -      WARN_ON(vm_update_params.ib->length_dw > ndw);
 +      amdgpu_ring_pad_ib(ring, params.ib);
 +      WARN_ON(params.ib->length_dw > ndw);
        r = amdgpu_job_submit(job, ring, &vm->entity,
                              AMDGPU_FENCE_OWNER_VM, &f);
        if (r)
@@@ -1117,32 -1062,28 +1117,32 @@@ static int amdgpu_vm_bo_split_mapping(s
   *
   * @adev: amdgpu_device pointer
   * @bo_va: requested BO and VM object
 - * @mem: ttm mem
 + * @clear: if true clear the entries
   *
   * Fill in the page table entries for @bo_va.
   * Returns 0 for success, -EINVAL for failure.
 - *
 - * Object have to be reserved and mutex must be locked!
   */
  int amdgpu_vm_bo_update(struct amdgpu_device *adev,
                        struct amdgpu_bo_va *bo_va,
 -                      struct ttm_mem_reg *mem)
 +                      bool clear)
  {
        struct amdgpu_vm *vm = bo_va->vm;
        struct amdgpu_bo_va_mapping *mapping;
        dma_addr_t *pages_addr = NULL;
        uint32_t gtt_flags, flags;
 +      struct ttm_mem_reg *mem;
        struct fence *exclusive;
        uint64_t addr;
        int r;
  
 -      if (mem) {
 +      if (clear) {
 +              mem = NULL;
 +              addr = 0;
 +              exclusive = NULL;
 +      } else {
                struct ttm_dma_tt *ttm;
  
 +              mem = &bo_va->bo->tbo.mem;
                addr = (u64)mem->start << PAGE_SHIFT;
                switch (mem->mem_type) {
                case TTM_PL_TT:
                }
  
                exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
 -      } else {
 -              addr = 0;
 -              exclusive = NULL;
        }
  
        flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
 -      gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
 +      gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
 +              adev == bo_va->bo->adev) ? flags : 0;
  
        spin_lock(&vm->status_lock);
        if (!list_empty(&bo_va->vm_status))
        spin_lock(&vm->status_lock);
        list_splice_init(&bo_va->invalids, &bo_va->valids);
        list_del_init(&bo_va->vm_status);
 -      if (!mem)
 +      if (clear)
                list_add(&bo_va->vm_status, &vm->cleared);
        spin_unlock(&vm->status_lock);
  
@@@ -1254,7 -1197,7 +1254,7 @@@ int amdgpu_vm_clear_invalids(struct amd
                        struct amdgpu_bo_va, vm_status);
                spin_unlock(&vm->status_lock);
  
 -              r = amdgpu_vm_bo_update(adev, bo_va, NULL);
 +              r = amdgpu_vm_bo_update(adev, bo_va, true);
                if (r)
                        return r;
  
@@@ -1399,8 -1342,7 +1399,8 @@@ int amdgpu_vm_bo_map(struct amdgpu_devi
                r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
                                     AMDGPU_GPU_PAGE_SIZE, true,
                                     AMDGPU_GEM_DOMAIN_VRAM,
 -                                   AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
 +                                   AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 +                                   AMDGPU_GEM_CREATE_SHADOW,
                                     NULL, resv, &pt);
                if (r)
                        goto error_free;
@@@ -1593,14 -1535,13 +1593,14 @@@ int amdgpu_vm_init(struct amdgpu_devic
        r = amd_sched_entity_init(&ring->sched, &vm->entity,
                                  rq, amdgpu_sched_jobs);
        if (r)
-               return r;
+               goto err;
  
        vm->page_directory_fence = NULL;
  
        r = amdgpu_bo_create(adev, pd_size, align, true,
                             AMDGPU_GEM_DOMAIN_VRAM,
 -                           AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
 +                           AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 +                           AMDGPU_GEM_CREATE_SHADOW,
                             NULL, NULL, &vm->page_directory);
        if (r)
                goto error_free_sched_entity;
@@@ -1624,6 -1565,9 +1624,9 @@@ error_free_page_directory
  error_free_sched_entity:
        amd_sched_entity_fini(&ring->sched, &vm->entity);
  
+ err:
+       drm_free_large(vm->page_tables);
        return r;
  }
  
@@@ -1656,16 -1600,10 +1659,16 @@@ void amdgpu_vm_fini(struct amdgpu_devic
                kfree(mapping);
        }
  
 -      for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
 +      for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) {
 +              if (vm->page_tables[i].entry.robj &&
 +                  vm->page_tables[i].entry.robj->shadow)
 +                      amdgpu_bo_unref(&vm->page_tables[i].entry.robj->shadow);
                amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
 +      }
        drm_free_large(vm->page_tables);
  
 +      if (vm->page_directory->shadow)
 +              amdgpu_bo_unref(&vm->page_directory->shadow);
        amdgpu_bo_unref(&vm->page_directory);
        fence_put(vm->page_directory_fence);
  }
index e6d7bf9520a0f6ddbf6829eb5cccacc8cf4300fe,77fdd9911c3cbe1268bb0c841edd6089ee058e18..cb952acc71339e31ac613a896fd1c268b0f327dc
@@@ -52,6 -52,7 +52,7 @@@ static void cik_sdma_set_ring_funcs(str
  static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
  static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
  static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
+ static int cik_sdma_soft_reset(void *handle);
  
  MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
  MODULE_FIRMWARE("radeon/bonaire_sdma1.bin");
@@@ -694,16 -695,24 +695,16 @@@ static void cik_sdma_vm_copy_pte(struc
                                 uint64_t pe, uint64_t src,
                                 unsigned count)
  {
 -      while (count) {
 -              unsigned bytes = count * 8;
 -              if (bytes > 0x1FFFF8)
 -                      bytes = 0x1FFFF8;
 -
 -              ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
 -                      SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 -              ib->ptr[ib->length_dw++] = bytes;
 -              ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
 -              ib->ptr[ib->length_dw++] = lower_32_bits(src);
 -              ib->ptr[ib->length_dw++] = upper_32_bits(src);
 -              ib->ptr[ib->length_dw++] = lower_32_bits(pe);
 -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 -
 -              pe += bytes;
 -              src += bytes;
 -              count -= bytes / 8;
 -      }
 +      unsigned bytes = count * 8;
 +
 +      ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
 +              SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 +      ib->ptr[ib->length_dw++] = bytes;
 +      ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
 +      ib->ptr[ib->length_dw++] = lower_32_bits(src);
 +      ib->ptr[ib->length_dw++] = upper_32_bits(src);
 +      ib->ptr[ib->length_dw++] = lower_32_bits(pe);
 +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  }
  
  /**
   *
   * @ib: indirect buffer to fill with commands
   * @pe: addr of the page entry
 - * @addr: dst addr to write into pe
 + * @value: dst addr to write into pe
   * @count: number of page entries to update
   * @incr: increase next addr by incr bytes
 - * @flags: access flags
   *
   * Update PTEs by writing them manually using sDMA (CIK).
   */
 -static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib,
 -                                const dma_addr_t *pages_addr, uint64_t pe,
 -                                uint64_t addr, unsigned count,
 -                                uint32_t incr, uint32_t flags)
 +static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
 +                                uint64_t value, unsigned count,
 +                                uint32_t incr)
  {
 -      uint64_t value;
 -      unsigned ndw;
 -
 -      while (count) {
 -              ndw = count * 2;
 -              if (ndw > 0xFFFFE)
 -                      ndw = 0xFFFFE;
 -
 -              /* for non-physically contiguous pages (system) */
 -              ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
 -                      SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 -              ib->ptr[ib->length_dw++] = pe;
 -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 -              ib->ptr[ib->length_dw++] = ndw;
 -              for (; ndw > 0; ndw -= 2, --count, pe += 8) {
 -                      value = amdgpu_vm_map_gart(pages_addr, addr);
 -                      addr += incr;
 -                      value |= flags;
 -                      ib->ptr[ib->length_dw++] = value;
 -                      ib->ptr[ib->length_dw++] = upper_32_bits(value);
 -              }
 +      unsigned ndw = count * 2;
 +
 +      ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
 +              SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 +      ib->ptr[ib->length_dw++] = lower_32_bits(pe);
 +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 +      ib->ptr[ib->length_dw++] = ndw;
 +      for (; ndw > 0; ndw -= 2) {
 +              ib->ptr[ib->length_dw++] = lower_32_bits(value);
 +              ib->ptr[ib->length_dw++] = upper_32_bits(value);
 +              value += incr;
        }
  }
  
   *
   * Update the page tables using sDMA (CIK).
   */
 -static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
 -                                  uint64_t pe,
 +static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
                                    uint64_t addr, unsigned count,
                                    uint32_t incr, uint32_t flags)
  {
 -      uint64_t value;
 -      unsigned ndw;
 -
 -      while (count) {
 -              ndw = count;
 -              if (ndw > 0x7FFFF)
 -                      ndw = 0x7FFFF;
 -
 -              if (flags & AMDGPU_PTE_VALID)
 -                      value = addr;
 -              else
 -                      value = 0;
 -
 -              /* for physically contiguous pages (vram) */
 -              ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
 -              ib->ptr[ib->length_dw++] = pe; /* dst addr */
 -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 -              ib->ptr[ib->length_dw++] = flags; /* mask */
 -              ib->ptr[ib->length_dw++] = 0;
 -              ib->ptr[ib->length_dw++] = value; /* value */
 -              ib->ptr[ib->length_dw++] = upper_32_bits(value);
 -              ib->ptr[ib->length_dw++] = incr; /* increment size */
 -              ib->ptr[ib->length_dw++] = 0;
 -              ib->ptr[ib->length_dw++] = ndw; /* number of entries */
 -
 -              pe += ndw * 8;
 -              addr += ndw * incr;
 -              count -= ndw;
 -      }
 +      /* for physically contiguous pages (vram) */
 +      ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
 +      ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
 +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 +      ib->ptr[ib->length_dw++] = flags; /* mask */
 +      ib->ptr[ib->length_dw++] = 0;
 +      ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
 +      ib->ptr[ib->length_dw++] = upper_32_bits(addr);
 +      ib->ptr[ib->length_dw++] = incr; /* increment size */
 +      ib->ptr[ib->length_dw++] = 0;
 +      ib->ptr[ib->length_dw++] = count; /* number of entries */
  }
  
  /**
@@@ -847,22 -887,6 +848,22 @@@ static void cik_sdma_ring_emit_vm_flush
        amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
  }
  
 +static unsigned cik_sdma_ring_get_emit_ib_size(struct amdgpu_ring *ring)
 +{
 +      return
 +              7 + 4; /* cik_sdma_ring_emit_ib */
 +}
 +
 +static unsigned cik_sdma_ring_get_dma_frame_size(struct amdgpu_ring *ring)
 +{
 +      return
 +              6 + /* cik_sdma_ring_emit_hdp_flush */
 +              3 + /* cik_sdma_ring_emit_hdp_invalidate */
 +              6 + /* cik_sdma_ring_emit_pipeline_sync */
 +              12 + /* cik_sdma_ring_emit_vm_flush */
 +              9 + 9 + 9; /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
 +}
 +
  static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
                                 bool enable)
  {
@@@ -1014,6 -1038,8 +1015,8 @@@ static int cik_sdma_resume(void *handle
  {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  
+       cik_sdma_soft_reset(handle);
        return cik_sdma_hw_init(adev);
  }
  
@@@ -1236,8 -1262,6 +1239,8 @@@ static const struct amdgpu_ring_funcs c
        .test_ib = cik_sdma_ring_test_ib,
        .insert_nop = cik_sdma_ring_insert_nop,
        .pad_ib = cik_sdma_ring_pad_ib,
 +      .get_emit_ib_size = cik_sdma_ring_get_emit_ib_size,
 +      .get_dma_frame_size = cik_sdma_ring_get_dma_frame_size,
  };
  
  static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
index 32a676291e67e14759c5d5dff522167f5a28d9ac,425413fcaf02ffb79436ab2f15f6f7389e8766dc..71116da9e782d52597837b2dfa95a17b2007998d
@@@ -1645,147 -1645,6 +1645,147 @@@ static u32 gfx_v7_0_get_rb_active_bitma
        return (~data) & mask;
  }
  
 +static void
 +gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
 +{
 +      switch (adev->asic_type) {
 +      case CHIP_BONAIRE:
 +              *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
 +                        SE_XSEL(1) | SE_YSEL(1);
 +              *rconf1 |= 0x0;
 +              break;
 +      case CHIP_HAWAII:
 +              *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
 +                        RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
 +                        PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
 +                        SE_YSEL(3);
 +              *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
 +                         SE_PAIR_YSEL(2);
 +              break;
 +      case CHIP_KAVERI:
 +              *rconf |= RB_MAP_PKR0(2);
 +              *rconf1 |= 0x0;
 +              break;
 +      case CHIP_KABINI:
 +      case CHIP_MULLINS:
 +              *rconf |= 0x0;
 +              *rconf1 |= 0x0;
 +              break;
 +      default:
 +              DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
 +              break;
 +      }
 +}
 +
 +static void
 +gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
 +                                      u32 raster_config, u32 raster_config_1,
 +                                      unsigned rb_mask, unsigned num_rb)
 +{
 +      unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
 +      unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
 +      unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
 +      unsigned rb_per_se = num_rb / num_se;
 +      unsigned se_mask[4];
 +      unsigned se;
 +
 +      se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
 +      se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
 +      se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
 +      se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
 +
 +      WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
 +      WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
 +      WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
 +
 +      if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
 +                           (!se_mask[2] && !se_mask[3]))) {
 +              raster_config_1 &= ~SE_PAIR_MAP_MASK;
 +
 +              if (!se_mask[0] && !se_mask[1]) {
 +                      raster_config_1 |=
 +                              SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
 +              } else {
 +                      raster_config_1 |=
 +                              SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
 +              }
 +      }
 +
 +      for (se = 0; se < num_se; se++) {
 +              unsigned raster_config_se = raster_config;
 +              unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
 +              unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
 +              int idx = (se / 2) * 2;
 +
 +              if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
 +                      raster_config_se &= ~SE_MAP_MASK;
 +
 +                      if (!se_mask[idx]) {
 +                              raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
 +                      } else {
 +                              raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
 +                      }
 +              }
 +
 +              pkr0_mask &= rb_mask;
 +              pkr1_mask &= rb_mask;
 +              if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
 +                      raster_config_se &= ~PKR_MAP_MASK;
 +
 +                      if (!pkr0_mask) {
 +                              raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
 +                      } else {
 +                              raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
 +                      }
 +              }
 +
 +              if (rb_per_se >= 2) {
 +                      unsigned rb0_mask = 1 << (se * rb_per_se);
 +                      unsigned rb1_mask = rb0_mask << 1;
 +
 +                      rb0_mask &= rb_mask;
 +                      rb1_mask &= rb_mask;
 +                      if (!rb0_mask || !rb1_mask) {
 +                              raster_config_se &= ~RB_MAP_PKR0_MASK;
 +
 +                              if (!rb0_mask) {
 +                                      raster_config_se |=
 +                                              RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
 +                              } else {
 +                                      raster_config_se |=
 +                                              RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
 +                              }
 +                      }
 +
 +                      if (rb_per_se > 2) {
 +                              rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
 +                              rb1_mask = rb0_mask << 1;
 +                              rb0_mask &= rb_mask;
 +                              rb1_mask &= rb_mask;
 +                              if (!rb0_mask || !rb1_mask) {
 +                                      raster_config_se &= ~RB_MAP_PKR1_MASK;
 +
 +                                      if (!rb0_mask) {
 +                                              raster_config_se |=
 +                                                      RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
 +                                      } else {
 +                                              raster_config_se |=
 +                                                      RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
 +                                      }
 +                              }
 +                      }
 +              }
 +
 +              /* GRBM_GFX_INDEX has a different offset on CI+ */
 +              gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
 +              WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
 +              WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
 +      }
 +
 +      /* GRBM_GFX_INDEX has a different offset on CI+ */
 +      gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 +}
 +
  /**
   * gfx_v7_0_setup_rb - setup the RBs on the asic
   *
@@@ -1799,11 -1658,9 +1799,11 @@@ static void gfx_v7_0_setup_rb(struct am
  {
        int i, j;
        u32 data;
 +      u32 raster_config = 0, raster_config_1 = 0;
        u32 active_rbs = 0;
        u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
                                        adev->gfx.config.max_sh_per_se;
 +      unsigned num_rb_pipes;
  
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                }
        }
        gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 -      mutex_unlock(&adev->grbm_idx_mutex);
  
        adev->gfx.config.backend_enable_mask = active_rbs;
        adev->gfx.config.num_rbs = hweight32(active_rbs);
 +
 +      num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
 +                           adev->gfx.config.max_shader_engines, 16);
 +
 +      gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
 +
 +      if (!adev->gfx.config.backend_enable_mask ||
 +                      adev->gfx.config.num_rbs >= num_rb_pipes) {
 +              WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
 +              WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
 +      } else {
 +              gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
 +                                                      adev->gfx.config.backend_enable_mask,
 +                                                      num_rb_pipes);
 +      }
 +      mutex_unlock(&adev->grbm_idx_mutex);
  }
  
  /**
@@@ -2254,25 -2096,6 +2254,25 @@@ static void gfx_v7_0_ring_emit_ib_compu
        amdgpu_ring_write(ring, control);
  }
  
 +static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
 +{
 +      uint32_t dw2 = 0;
 +
 +      dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
 +      if (flags & AMDGPU_HAVE_CTX_SWITCH) {
 +              /* set load_global_config & load_global_uconfig */
 +              dw2 |= 0x8001;
 +              /* set load_cs_sh_regs */
 +              dw2 |= 0x01000000;
 +              /* set load_per_context_state & load_gfx_sh_regs */
 +              dw2 |= 0x10002;
 +      }
 +
 +      amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
 +      amdgpu_ring_write(ring, dw2);
 +      amdgpu_ring_write(ring, 0);
 +}
 +
  /**
   * gfx_v7_0_ring_test_ib - basic ring IB test
   *
@@@ -2620,7 -2443,7 +2620,7 @@@ static int gfx_v7_0_cp_gfx_resume(struc
        return 0;
  }
  
 -static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
 +static u32 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
  {
        return ring->adev->wb.wb[ring->rptr_offs];
  }
@@@ -2640,6 -2463,11 +2640,6 @@@ static void gfx_v7_0_ring_set_wptr_gfx(
        (void)RREG32(mmCP_RB0_WPTR);
  }
  
 -static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 -{
 -      return ring->adev->wb.wb[ring->rptr_offs];
 -}
 -
  static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
  {
        /* XXX check if swapping is necessary on BE */
@@@ -2927,8 -2755,7 +2927,7 @@@ static int gfx_v7_0_cp_compute_resume(s
        u64 wb_gpu_addr;
        u32 *buf;
        struct bonaire_mqd *mqd;
-       gfx_v7_0_cp_compute_enable(adev, true);
+       struct amdgpu_ring *ring;
  
        /* fix up chicken bits */
        tmp = RREG32(mmCP_CPF_DEBUG);
  
        /* init the queues.  Just two for now. */
        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-               struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+               ring = &adev->gfx.compute_ring[i];
  
                if (ring->mqd_obj == NULL) {
                        r = amdgpu_bo_create(adev,
                amdgpu_bo_unreserve(ring->mqd_obj);
  
                ring->ready = true;
+       }
+       gfx_v7_0_cp_compute_enable(adev, true);
+       for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+               ring = &adev->gfx.compute_ring[i];
                r = amdgpu_ring_test_ring(ring);
                if (r)
                        ring->ready = false;
@@@ -4348,41 -4182,6 +4354,41 @@@ static void gfx_v7_0_ring_emit_gds_swit
        amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
  }
  
 +static unsigned gfx_v7_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
 +{
 +      return
 +              4; /* gfx_v7_0_ring_emit_ib_gfx */
 +}
 +
 +static unsigned gfx_v7_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
 +{
 +      return
 +              20 + /* gfx_v7_0_ring_emit_gds_switch */
 +              7 + /* gfx_v7_0_ring_emit_hdp_flush */
 +              5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
 +              12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
 +              7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
 +              17 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
 +              3; /* gfx_v7_ring_emit_cntxcntl */
 +}
 +
 +static unsigned gfx_v7_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
 +{
 +      return
 +              4; /* gfx_v7_0_ring_emit_ib_compute */
 +}
 +
 +static unsigned gfx_v7_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
 +{
 +      return
 +              20 + /* gfx_v7_0_ring_emit_gds_switch */
 +              7 + /* gfx_v7_0_ring_emit_hdp_flush */
 +              5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
 +              7 + /* gfx_v7_0_ring_emit_pipeline_sync */
 +              17 + /* gfx_v7_0_ring_emit_vm_flush */
 +              7 + 7 + 7; /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
 +}
 +
  static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
        .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
        .select_se_sh = &gfx_v7_0_select_se_sh,
@@@ -4672,21 -4471,24 +4678,21 @@@ static int gfx_v7_0_sw_init(void *handl
        }
  
        /* reserve GDS, GWS and OA resource for gfx */
 -      r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
 -                      PAGE_SIZE, true,
 -                      AMDGPU_GEM_DOMAIN_GDS, 0,
 -                      NULL, NULL, &adev->gds.gds_gfx_bo);
 +      r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
 +                                  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
 +                                  &adev->gds.gds_gfx_bo, NULL, NULL);
        if (r)
                return r;
  
 -      r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
 -              PAGE_SIZE, true,
 -              AMDGPU_GEM_DOMAIN_GWS, 0,
 -              NULL, NULL, &adev->gds.gws_gfx_bo);
 +      r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
 +                                  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
 +                                  &adev->gds.gws_gfx_bo, NULL, NULL);
        if (r)
                return r;
  
 -      r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
 -                      PAGE_SIZE, true,
 -                      AMDGPU_GEM_DOMAIN_OA, 0,
 -                      NULL, NULL, &adev->gds.oa_gfx_bo);
 +      r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
 +                                  PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
 +                                  &adev->gds.oa_gfx_bo, NULL, NULL);
        if (r)
                return r;
  
@@@ -4702,9 -4504,9 +4708,9 @@@ static int gfx_v7_0_sw_fini(void *handl
        int i;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  
 -      amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
 -      amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
 -      amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
 +      amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
 +      amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
 +      amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
  
        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
                amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@@ -5135,7 -4937,7 +5141,7 @@@ const struct amd_ip_funcs gfx_v7_0_ip_f
  };
  
  static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 -      .get_rptr = gfx_v7_0_ring_get_rptr_gfx,
 +      .get_rptr = gfx_v7_0_ring_get_rptr,
        .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
        .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
        .parse_cs = NULL,
        .test_ib = gfx_v7_0_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
 +      .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
 +      .get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_gfx,
 +      .get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_gfx,
  };
  
  static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
 -      .get_rptr = gfx_v7_0_ring_get_rptr_compute,
 +      .get_rptr = gfx_v7_0_ring_get_rptr,
        .get_wptr = gfx_v7_0_ring_get_wptr_compute,
        .set_wptr = gfx_v7_0_ring_set_wptr_compute,
        .parse_cs = NULL,
        .test_ib = gfx_v7_0_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
 +      .get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_compute,
 +      .get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_compute,
  };
  
  static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
index 9ae307505190ec0afd81bc0b049d7b2b76bff3b0,a64715d90503a87e02478d6fe718eb7c90047b37..565dab3c72186704542b3763fbcdcecb9a6e9cf9
@@@ -190,8 -190,12 +190,8 @@@ out
   */
  static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
  {
 -      u32 rptr;
 -
        /* XXX check if swapping is necessary on BE */
 -      rptr = ring->adev->wb.wb[ring->rptr_offs] >> 2;
 -
 -      return rptr;
 +      return ring->adev->wb.wb[ring->rptr_offs] >> 2;
  }
  
  /**
@@@ -710,7 -714,7 +710,7 @@@ static int sdma_v2_4_ring_test_ib(struc
                DRM_ERROR("amdgpu: IB test timed out\n");
                r = -ETIMEDOUT;
                goto err1;
-       } else if (r) {
+       } else if (r < 0) {
                DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
                goto err1;
        }
@@@ -745,16 -749,24 +745,16 @@@ static void sdma_v2_4_vm_copy_pte(struc
                                  uint64_t pe, uint64_t src,
                                  unsigned count)
  {
 -      while (count) {
 -              unsigned bytes = count * 8;
 -              if (bytes > 0x1FFFF8)
 -                      bytes = 0x1FFFF8;
 -
 -              ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
 -                      SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
 -              ib->ptr[ib->length_dw++] = bytes;
 -              ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
 -              ib->ptr[ib->length_dw++] = lower_32_bits(src);
 -              ib->ptr[ib->length_dw++] = upper_32_bits(src);
 -              ib->ptr[ib->length_dw++] = lower_32_bits(pe);
 -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 -
 -              pe += bytes;
 -              src += bytes;
 -              count -= bytes / 8;
 -      }
 +      unsigned bytes = count * 8;
 +
 +      ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
 +              SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
 +      ib->ptr[ib->length_dw++] = bytes;
 +      ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
 +      ib->ptr[ib->length_dw++] = lower_32_bits(src);
 +      ib->ptr[ib->length_dw++] = upper_32_bits(src);
 +      ib->ptr[ib->length_dw++] = lower_32_bits(pe);
 +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  }
  
  /**
   *
   * @ib: indirect buffer to fill with commands
   * @pe: addr of the page entry
 - * @addr: dst addr to write into pe
 + * @value: dst addr to write into pe
   * @count: number of page entries to update
   * @incr: increase next addr by incr bytes
 - * @flags: access flags
   *
   * Update PTEs by writing them manually using sDMA (CIK).
   */
 -static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib,
 -                                 const dma_addr_t *pages_addr, uint64_t pe,
 -                                 uint64_t addr, unsigned count,
 -                                 uint32_t incr, uint32_t flags)
 +static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
 +                                 uint64_t value, unsigned count,
 +                                 uint32_t incr)
  {
 -      uint64_t value;
 -      unsigned ndw;
 -
 -      while (count) {
 -              ndw = count * 2;
 -              if (ndw > 0xFFFFE)
 -                      ndw = 0xFFFFE;
 -
 -              /* for non-physically contiguous pages (system) */
 -              ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
 -                      SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
 -              ib->ptr[ib->length_dw++] = pe;
 -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 -              ib->ptr[ib->length_dw++] = ndw;
 -              for (; ndw > 0; ndw -= 2, --count, pe += 8) {
 -                      value = amdgpu_vm_map_gart(pages_addr, addr);
 -                      addr += incr;
 -                      value |= flags;
 -                      ib->ptr[ib->length_dw++] = value;
 -                      ib->ptr[ib->length_dw++] = upper_32_bits(value);
 -              }
 +      unsigned ndw = count * 2;
 +
 +      ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
 +              SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
 +      ib->ptr[ib->length_dw++] = pe;
 +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 +      ib->ptr[ib->length_dw++] = ndw;
 +      for (; ndw > 0; ndw -= 2, --count, pe += 8) {
 +              ib->ptr[ib->length_dw++] = lower_32_bits(value);
 +              ib->ptr[ib->length_dw++] = upper_32_bits(value);
 +              value += incr;
        }
  }
  
   *
   * Update the page tables using sDMA (CIK).
   */
 -static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
 -                                   uint64_t pe,
 +static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
                                     uint64_t addr, unsigned count,
                                     uint32_t incr, uint32_t flags)
  {
 -      uint64_t value;
 -      unsigned ndw;
 -
 -      while (count) {
 -              ndw = count;
 -              if (ndw > 0x7FFFF)
 -                      ndw = 0x7FFFF;
 -
 -              if (flags & AMDGPU_PTE_VALID)
 -                      value = addr;
 -              else
 -                      value = 0;
 -
 -              /* for physically contiguous pages (vram) */
 -              ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
 -              ib->ptr[ib->length_dw++] = pe; /* dst addr */
 -              ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 -              ib->ptr[ib->length_dw++] = flags; /* mask */
 -              ib->ptr[ib->length_dw++] = 0;
 -              ib->ptr[ib->length_dw++] = value; /* value */
 -              ib->ptr[ib->length_dw++] = upper_32_bits(value);
 -              ib->ptr[ib->length_dw++] = incr; /* increment size */
 -              ib->ptr[ib->length_dw++] = 0;
 -              ib->ptr[ib->length_dw++] = ndw; /* number of entries */
 -
 -              pe += ndw * 8;
 -              addr += ndw * incr;
 -              count -= ndw;
 -      }
 +      /* for physically contiguous pages (vram) */
 +      ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
 +      ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
 +      ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 +      ib->ptr[ib->length_dw++] = flags; /* mask */
 +      ib->ptr[ib->length_dw++] = 0;
 +      ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
 +      ib->ptr[ib->length_dw++] = upper_32_bits(addr);
 +      ib->ptr[ib->length_dw++] = incr; /* increment size */
 +      ib->ptr[ib->length_dw++] = 0;
 +      ib->ptr[ib->length_dw++] = count; /* number of entries */
  }
  
  /**
@@@ -902,22 -945,6 +902,22 @@@ static void sdma_v2_4_ring_emit_vm_flus
                          SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
  }
  
 +static unsigned sdma_v2_4_ring_get_emit_ib_size(struct amdgpu_ring *ring)
 +{
 +      return
 +              7 + 6; /* sdma_v2_4_ring_emit_ib */
 +}
 +
 +static unsigned sdma_v2_4_ring_get_dma_frame_size(struct amdgpu_ring *ring)
 +{
 +      return
 +              6 + /* sdma_v2_4_ring_emit_hdp_flush */
 +              3 + /* sdma_v2_4_ring_emit_hdp_invalidate */
 +              6 + /* sdma_v2_4_ring_emit_pipeline_sync */
 +              12 + /* sdma_v2_4_ring_emit_vm_flush */
 +              10 + 10 + 10; /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
 +}
 +
  static int sdma_v2_4_early_init(void *handle)
  {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@@ -1236,8 -1263,6 +1236,8 @@@ static const struct amdgpu_ring_funcs s
        .test_ib = sdma_v2_4_ring_test_ib,
        .insert_nop = sdma_v2_4_ring_insert_nop,
        .pad_ib = sdma_v2_4_ring_pad_ib,
 +      .get_emit_ib_size = sdma_v2_4_ring_get_emit_ib_size,
 +      .get_dma_frame_size = sdma_v2_4_ring_get_dma_frame_size,
  };
  
  static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
index ad494b875311fcdacc0994afd76ad8be8761513b,a7d3cb3fead0f6c63536996c1112f4c76a540e19..453c5d66e5c34abf0e6e3f24bbebcacdf2fcca49
@@@ -142,15 -142,13 +142,15 @@@ int kfd_doorbell_mmap(struct kfd_proces
  
        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  
 -      pr_debug("mapping doorbell page:\n");
 -      pr_debug("     target user address == 0x%08llX\n",
 -                      (unsigned long long) vma->vm_start);
 -      pr_debug("     physical address    == 0x%08llX\n", address);
 -      pr_debug("     vm_flags            == 0x%04lX\n", vma->vm_flags);
 -      pr_debug("     size                == 0x%04lX\n",
 -                       doorbell_process_allocation());
 +      pr_debug("kfd: mapping doorbell page in %s\n"
 +               "     target user address == 0x%08llX\n"
 +               "     physical address    == 0x%08llX\n"
 +               "     vm_flags            == 0x%04lX\n"
 +               "     size                == 0x%04lX\n",
 +               __func__,
 +               (unsigned long long) vma->vm_start, address, vma->vm_flags,
 +               doorbell_process_allocation());
 +
  
        return io_remap_pfn_range(vma,
                                vma->vm_start,
@@@ -186,7 -184,7 +186,7 @@@ u32 __iomem *kfd_get_kernel_doorbell(st
                                                        sizeof(u32)) + inx;
  
        pr_debug("kfd: get kernel queue doorbell\n"
-                        "     doorbell offset   == 0x%08d\n"
+                        "     doorbell offset   == 0x%08X\n"
                         "     kernel address    == 0x%08lX\n",
                *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
  
index 72e6b7dd457b180f85c53438e32f1fc0e804297c,52c527f6642a45efa0d1881b38256bdd75380bfb..9d4c030672f0ff8ae8a830e63afe26a7948707c0
@@@ -320,19 -320,19 +320,19 @@@ atmel_hlcdc_plane_update_pos_and_size(s
                        u32 *coeff_tab = heo_upscaling_ycoef;
                        u32 max_memsize;
  
-                       if (state->crtc_w < state->src_w)
+                       if (state->crtc_h < state->src_h)
                                coeff_tab = heo_downscaling_ycoef;
                        for (i = 0; i < ARRAY_SIZE(heo_upscaling_ycoef); i++)
                                atmel_hlcdc_layer_update_cfg(&plane->layer,
                                                             33 + i,
                                                             0xffffffff,
                                                             coeff_tab[i]);
-                       factor = ((8 * 256 * state->src_w) - (256 * 4)) /
-                                state->crtc_w;
+                       factor = ((8 * 256 * state->src_h) - (256 * 4)) /
+                                state->crtc_h;
                        factor++;
-                       max_memsize = ((factor * state->crtc_w) + (256 * 4)) /
+                       max_memsize = ((factor * state->crtc_h) + (256 * 4)) /
                                      2048;
-                       if (max_memsize > state->src_w)
+                       if (max_memsize > state->src_h)
                                factor--;
                        factor_reg |= (factor << 16) | 0x80000000;
                }
@@@ -393,7 -393,7 +393,7 @@@ static void atmel_hlcdc_plane_update_fo
  
        if ((state->base.fb->pixel_format == DRM_FORMAT_YUV422 ||
             state->base.fb->pixel_format == DRM_FORMAT_NV61) &&
 -          (state->base.rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270))))
 +          (state->base.rotation & (DRM_ROTATE_90 | DRM_ROTATE_270)))
                cfg |= ATMEL_HLCDC_YUV422ROT;
  
        atmel_hlcdc_layer_update_cfg(&plane->layer,
@@@ -628,7 -628,7 +628,7 @@@ static int atmel_hlcdc_plane_atomic_che
        /*
         * Swap width and size in case of 90 or 270 degrees rotation
         */
 -      if (state->base.rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270))) {
 +      if (state->base.rotation & (DRM_ROTATE_90 | DRM_ROTATE_270)) {
                tmp = state->crtc_w;
                state->crtc_w = state->crtc_h;
                state->crtc_h = tmp;
                        return -EINVAL;
  
                switch (state->base.rotation & DRM_ROTATE_MASK) {
 -              case BIT(DRM_ROTATE_90):
 +              case DRM_ROTATE_90:
                        offset = ((y_offset + state->src_y + patched_src_w - 1) /
                                  ydiv) * fb->pitches[i];
                        offset += ((x_offset + state->src_x) / xdiv) *
                                          fb->pitches[i];
                        state->pstride[i] = -fb->pitches[i] - state->bpp[i];
                        break;
 -              case BIT(DRM_ROTATE_180):
 +              case DRM_ROTATE_180:
                        offset = ((y_offset + state->src_y + patched_src_h - 1) /
                                  ydiv) * fb->pitches[i];
                        offset += ((x_offset + state->src_x + patched_src_w - 1) /
                                           state->bpp[i]) - fb->pitches[i];
                        state->pstride[i] = -2 * state->bpp[i];
                        break;
 -              case BIT(DRM_ROTATE_270):
 +              case DRM_ROTATE_270:
                        offset = ((y_offset + state->src_y) / ydiv) *
                                 fb->pitches[i];
                        offset += ((x_offset + state->src_x + patched_src_h - 1) /
                                          (2 * state->bpp[i]);
                        state->pstride[i] = fb->pitches[i] - state->bpp[i];
                        break;
 -              case BIT(DRM_ROTATE_0):
 +              case DRM_ROTATE_0:
                default:
                        offset = ((y_offset + state->src_y) / ydiv) *
                                 fb->pitches[i];
  }
  
  static int atmel_hlcdc_plane_prepare_fb(struct drm_plane *p,
 -                                      const struct drm_plane_state *new_state)
 +                                      struct drm_plane_state *new_state)
  {
        /*
         * FIXME: we should avoid this const -> non-const cast but it's
  }
  
  static void atmel_hlcdc_plane_cleanup_fb(struct drm_plane *p,
 -                              const struct drm_plane_state *old_state)
 +                                       struct drm_plane_state *old_state)
  {
        /*
         * FIXME: we should avoid this const -> non-const cast but it's
@@@ -905,7 -905,7 +905,7 @@@ static void atmel_hlcdc_plane_init_prop
        if (desc->layout.xstride && desc->layout.pstride)
                drm_object_attach_property(&plane->base.base,
                                plane->base.dev->mode_config.rotation_property,
 -                              BIT(DRM_ROTATE_0));
 +                              DRM_ROTATE_0);
  
        if (desc->layout.csc) {
                /*
@@@ -1056,10 -1056,10 +1056,10 @@@ atmel_hlcdc_plane_create_properties(str
  
        dev->mode_config.rotation_property =
                        drm_mode_create_rotation_property(dev,
 -                                                        BIT(DRM_ROTATE_0) |
 -                                                        BIT(DRM_ROTATE_90) |
 -                                                        BIT(DRM_ROTATE_180) |
 -                                                        BIT(DRM_ROTATE_270));
 +                                                        DRM_ROTATE_0 |
 +                                                        DRM_ROTATE_90 |
 +                                                        DRM_ROTATE_180 |
 +                                                        DRM_ROTATE_270);
        if (!dev->mode_config.rotation_property)
                return ERR_PTR(-ENOMEM);
  
index 904d29c012ad7713e7207ca2d49cfc76552d062d,2a3ded44cf2a689c66ecfd48ce9362004a9a5964..23739609427d86b9cd64d81ddad719bf5fc2bd78
@@@ -475,7 -475,7 +475,7 @@@ int drm_atomic_crtc_set_property(struc
                                        val,
                                        -1,
                                        &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                return ret;
        } else if (property == config->ctm_property) {
                ret = drm_atomic_replace_property_blob_from_id(crtc,
                                        val,
                                        sizeof(struct drm_color_ctm),
                                        &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                return ret;
        } else if (property == config->gamma_lut_property) {
                ret = drm_atomic_replace_property_blob_from_id(crtc,
                                        val,
                                        -1,
                                        &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                return ret;
        } else if (crtc->funcs->atomic_set_property)
                return crtc->funcs->atomic_set_property(crtc, state, property, val);
@@@ -837,9 -837,8 +837,9 @@@ static int drm_atomic_plane_check(struc
        /* Check whether this plane supports the fb pixel format. */
        ret = drm_plane_check_pixel_format(plane, state->fb->pixel_format);
        if (ret) {
 -              DRM_DEBUG_ATOMIC("Invalid pixel format %s\n",
 -                               drm_get_format_name(state->fb->pixel_format));
 +              char *format_name = drm_get_format_name(state->fb->pixel_format);
 +              DRM_DEBUG_ATOMIC("Invalid pixel format %s\n", format_name);
 +              kfree(format_name);
                return ret;
        }
  
@@@ -1691,7 -1690,7 +1691,7 @@@ retry
                                goto out;
                        }
  
 -                      prop = drm_property_find(dev, prop_id);
 +                      prop = drm_mode_obj_find_prop_id(obj, prop_id);
                        if (!prop) {
                                drm_mode_object_unreference(obj);
                                ret = -ENOENT;
index 5e830281bebdba810580a307d9c3c50d7d216151,0a06f9120b5a61b56f0a8db9c77a3374bd20e37d..03414bde1f152637a7ed6002ed8a88e30611fec8
   */
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
 +#include <linux/console.h>
  #include <linux/kernel.h>
  #include <linux/sysrq.h>
  #include <linux/slab.h>
 -#include <linux/fb.h>
  #include <linux/module.h>
  #include <drm/drmP.h>
  #include <drm/drm_crtc.h>
@@@ -41,8 -41,6 +41,8 @@@
  #include <drm/drm_atomic.h>
  #include <drm/drm_atomic_helper.h>
  
 +#include "drm_crtc_helper_internal.h"
 +
  static bool drm_fbdev_emulation = true;
  module_param_named(fbdev_emulation, drm_fbdev_emulation, bool, 0600);
  MODULE_PARM_DESC(fbdev_emulation,
@@@ -337,7 -335,7 +337,7 @@@ retry
                        goto fail;
                }
  
 -              plane_state->rotation = BIT(DRM_ROTATE_0);
 +              plane_state->rotation = DRM_ROTATE_0;
  
                plane->old_fb = plane->fb;
                plane_mask |= 1 << drm_plane_index(plane);
@@@ -397,7 -395,7 +397,7 @@@ static int restore_fbdev_mode(struct dr
                if (dev->mode_config.rotation_property) {
                        drm_mode_plane_set_obj_prop(plane,
                                                    dev->mode_config.rotation_property,
 -                                                  BIT(DRM_ROTATE_0));
 +                                                  DRM_ROTATE_0);
                }
        }
  
@@@ -466,7 -464,7 +466,7 @@@ static bool drm_fb_helper_is_bound(stru
  
        /* Sometimes user space wants everything disabled, so don't steal the
         * display if there's a master. */
-       if (lockless_dereference(dev->master))
+       if (READ_ONCE(dev->master))
                return false;
  
        drm_for_each_crtc(crtc, dev) {
@@@ -620,16 -618,6 +620,16 @@@ static void drm_fb_helper_crtc_free(str
        kfree(helper->crtc_info);
  }
  
 +static void drm_fb_helper_resume_worker(struct work_struct *work)
 +{
 +      struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper,
 +                                                  resume_work);
 +
 +      console_lock();
 +      fb_set_suspend(helper->fbdev, 0);
 +      console_unlock();
 +}
 +
  static void drm_fb_helper_dirty_work(struct work_struct *work)
  {
        struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper,
@@@ -661,7 -649,6 +661,7 @@@ void drm_fb_helper_prepare(struct drm_d
  {
        INIT_LIST_HEAD(&helper->kernel_fb_list);
        spin_lock_init(&helper->dirty_lock);
 +      INIT_WORK(&helper->resume_work, drm_fb_helper_resume_worker);
        INIT_WORK(&helper->dirty_work, drm_fb_helper_dirty_work);
        helper->dirty_clip.x1 = helper->dirty_clip.y1 = ~0;
        helper->funcs = funcs;
@@@ -1037,65 -1024,17 +1037,65 @@@ EXPORT_SYMBOL(drm_fb_helper_cfb_imagebl
  /**
   * drm_fb_helper_set_suspend - wrapper around fb_set_suspend
   * @fb_helper: driver-allocated fbdev helper
 - * @state: desired state, zero to resume, non-zero to suspend
 + * @suspend: whether to suspend or resume
   *
 - * A wrapper around fb_set_suspend implemented by fbdev core
 + * A wrapper around fb_set_suspend implemented by fbdev core.
 + * Use drm_fb_helper_set_suspend_unlocked() if you don't need to take
 + * the lock yourself
   */
 -void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, int state)
 +void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, bool suspend)
  {
        if (fb_helper && fb_helper->fbdev)
 -              fb_set_suspend(fb_helper->fbdev, state);
 +              fb_set_suspend(fb_helper->fbdev, suspend);
  }
  EXPORT_SYMBOL(drm_fb_helper_set_suspend);
  
 +/**
 + * drm_fb_helper_set_suspend_unlocked - wrapper around fb_set_suspend that also
 + *                                      takes the console lock
 + * @fb_helper: driver-allocated fbdev helper
 + * @suspend: whether to suspend or resume
 + *
 + * A wrapper around fb_set_suspend() that takes the console lock. If the lock
 + * isn't available on resume, a worker is tasked with waiting for the lock
 + * to become available. The console lock can be pretty contented on resume
 + * due to all the printk activity.
 + *
 + * This function can be called multiple times with the same state since
 + * &fb_info->state is checked to see if fbdev is running or not before locking.
 + *
 + * Use drm_fb_helper_set_suspend() if you need to take the lock yourself.
 + */
 +void drm_fb_helper_set_suspend_unlocked(struct drm_fb_helper *fb_helper,
 +                                      bool suspend)
 +{
 +      if (!fb_helper || !fb_helper->fbdev)
 +              return;
 +
 +      /* make sure there's no pending/ongoing resume */
 +      flush_work(&fb_helper->resume_work);
 +
 +      if (suspend) {
 +              if (fb_helper->fbdev->state != FBINFO_STATE_RUNNING)
 +                      return;
 +
 +              console_lock();
 +
 +      } else {
 +              if (fb_helper->fbdev->state == FBINFO_STATE_RUNNING)
 +                      return;
 +
 +              if (!console_trylock()) {
 +                      schedule_work(&fb_helper->resume_work);
 +                      return;
 +              }
 +      }
 +
 +      fb_set_suspend(fb_helper->fbdev, suspend);
 +      console_unlock();
 +}
 +EXPORT_SYMBOL(drm_fb_helper_set_suspend_unlocked);
 +
  static int setcolreg(struct drm_crtc *crtc, u16 red, u16 green,
                     u16 blue, u16 regno, struct fb_info *info)
  {
@@@ -2255,7 -2194,7 +2255,7 @@@ EXPORT_SYMBOL(drm_fb_helper_initial_con
   * @fb_helper: the drm_fb_helper
   *
   * Scan the connectors attached to the fb_helper and try to put together a
 - * setup after *notification of a change in output configuration.
 + * setup after notification of a change in output configuration.
   *
   * Called at runtime, takes the mode config locks to be able to check/change the
   * modeset configuration. Must be run from process context (which usually means
index c8bd02277b7d4ef66a51dafee058dad742a93c49,a77ce9983f69c9965725f806a008fc06a129935f..2c81067589225a89762a4129fac7eeee0abebed9
  #include <drm/drm_vma_manager.h>
  #include <drm/i915_drm.h>
  #include "i915_drv.h"
 +#include "i915_gem_dmabuf.h"
  #include "i915_vgpu.h"
  #include "i915_trace.h"
  #include "intel_drv.h"
 +#include "intel_frontbuffer.h"
  #include "intel_mocs.h"
 +#include <linux/reservation.h>
  #include <linux/shmem_fs.h>
  #include <linux/slab.h>
  #include <linux/swap.h>
  
  static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
  static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
 -static void
 -i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
 -static void
 -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
  
  static bool cpu_cache_is_coherent(struct drm_device *dev,
                                  enum i915_cache_level level)
@@@ -138,6 -139,7 +138,6 @@@ int i915_mutex_lock_interruptible(struc
        if (ret)
                return ret;
  
 -      WARN_ON(i915_verify_lists(dev));
        return 0;
  }
  
@@@ -154,10 -156,10 +154,10 @@@ i915_gem_get_aperture_ioctl(struct drm_
        pinned = 0;
        mutex_lock(&dev->struct_mutex);
        list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
 -              if (vma->pin_count)
 +              if (i915_vma_is_pinned(vma))
                        pinned += vma->node.size;
        list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
 -              if (vma->pin_count)
 +              if (i915_vma_is_pinned(vma))
                        pinned += vma->node.size;
        mutex_unlock(&dev->struct_mutex);
  
@@@ -279,129 -281,23 +279,129 @@@ static const struct drm_i915_gem_object
        .release = i915_gem_object_release_phys,
  };
  
 -static int
 -drop_pages(struct drm_i915_gem_object *obj)
 +int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
  {
 -      struct i915_vma *vma, *next;
 +      struct i915_vma *vma;
 +      LIST_HEAD(still_in_list);
        int ret;
  
 -      drm_gem_object_reference(&obj->base);
 -      list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link)
 -              if (i915_vma_unbind(vma))
 -                      break;
 +      lockdep_assert_held(&obj->base.dev->struct_mutex);
  
 -      ret = i915_gem_object_put_pages(obj);
 -      drm_gem_object_unreference(&obj->base);
 +      /* Closed vma are removed from the obj->vma_list - but they may
 +       * still have an active binding on the object. To remove those we
 +       * must wait for all rendering to complete to the object (as unbinding
 +       * must anyway), and retire the requests.
 +       */
 +      ret = i915_gem_object_wait_rendering(obj, false);
 +      if (ret)
 +              return ret;
 +
 +      i915_gem_retire_requests(to_i915(obj->base.dev));
 +
 +      while ((vma = list_first_entry_or_null(&obj->vma_list,
 +                                             struct i915_vma,
 +                                             obj_link))) {
 +              list_move_tail(&vma->obj_link, &still_in_list);
 +              ret = i915_vma_unbind(vma);
 +              if (ret)
 +                      break;
 +      }
 +      list_splice(&still_in_list, &obj->vma_list);
  
        return ret;
  }
  
 +/**
 + * Ensures that all rendering to the object has completed and the object is
 + * safe to unbind from the GTT or access from the CPU.
 + * @obj: i915 gem object
 + * @readonly: waiting for just read access or read-write access
 + */
 +int
 +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 +                             bool readonly)
 +{
 +      struct reservation_object *resv;
 +      struct i915_gem_active *active;
 +      unsigned long active_mask;
 +      int idx;
 +
 +      lockdep_assert_held(&obj->base.dev->struct_mutex);
 +
 +      if (!readonly) {
 +              active = obj->last_read;
 +              active_mask = i915_gem_object_get_active(obj);
 +      } else {
 +              active_mask = 1;
 +              active = &obj->last_write;
 +      }
 +
 +      for_each_active(active_mask, idx) {
 +              int ret;
 +
 +              ret = i915_gem_active_wait(&active[idx],
 +                                         &obj->base.dev->struct_mutex);
 +              if (ret)
 +                      return ret;
 +      }
 +
 +      resv = i915_gem_object_get_dmabuf_resv(obj);
 +      if (resv) {
 +              long err;
 +
 +              err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
 +                                                        MAX_SCHEDULE_TIMEOUT);
 +              if (err < 0)
 +                      return err;
 +      }
 +
 +      return 0;
 +}
 +
 +/* A nonblocking variant of the above wait. Must be called prior to
 + * acquiring the mutex for the object, as the object state may change
 + * during this call. A reference must be held by the caller for the object.
 + */
 +static __must_check int
 +__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
 +                      struct intel_rps_client *rps,
 +                      bool readonly)
 +{
 +      struct i915_gem_active *active;
 +      unsigned long active_mask;
 +      int idx;
 +
 +      active_mask = __I915_BO_ACTIVE(obj);
 +      if (!active_mask)
 +              return 0;
 +
 +      if (!readonly) {
 +              active = obj->last_read;
 +      } else {
 +              active_mask = 1;
 +              active = &obj->last_write;
 +      }
 +
 +      for_each_active(active_mask, idx) {
 +              int ret;
 +
 +              ret = i915_gem_active_wait_unlocked(&active[idx],
 +                                                  I915_WAIT_INTERRUPTIBLE,
 +                                                  NULL, rps);
 +              if (ret)
 +                      return ret;
 +      }
 +
 +      return 0;
 +}
 +
 +static struct intel_rps_client *to_rps_client(struct drm_file *file)
 +{
 +      struct drm_i915_file_private *fpriv = file->driver_priv;
 +
 +      return &fpriv->rps;
 +}
 +
  int
  i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
                            int align)
        if (obj->base.filp == NULL)
                return -EINVAL;
  
 -      ret = drop_pages(obj);
 +      ret = i915_gem_object_unbind(obj);
 +      if (ret)
 +              return ret;
 +
 +      ret = i915_gem_object_put_pages(obj);
        if (ret)
                return ret;
  
@@@ -516,7 -408,7 +516,7 @@@ i915_gem_create(struct drm_file *file
  
        ret = drm_gem_handle_create(file, &obj->base, &handle);
        /* drop reference from allocate - handle holds it now */
 -      drm_gem_object_unreference_unlocked(&obj->base);
 +      i915_gem_object_put_unlocked(obj);
        if (ret)
                return ret;
  
@@@ -610,106 -502,33 +610,106 @@@ __copy_from_user_swizzled(char *gpu_vad
   * flush the object from the CPU cache.
   */
  int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 -                                  int *needs_clflush)
 +                                  unsigned int *needs_clflush)
  {
        int ret;
  
        *needs_clflush = 0;
  
 -      if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
 -              return -EINVAL;
 +      if (!i915_gem_object_has_struct_page(obj))
 +              return -ENODEV;
 +
 +      ret = i915_gem_object_wait_rendering(obj, true);
 +      if (ret)
 +              return ret;
 +
 +      ret = i915_gem_object_get_pages(obj);
 +      if (ret)
 +              return ret;
 +
 +      i915_gem_object_pin_pages(obj);
 +
 +      i915_gem_object_flush_gtt_write_domain(obj);
  
 -      if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
 -              /* If we're not in the cpu read domain, set ourself into the gtt
 -               * read domain and manually flush cachelines (if required). This
 -               * optimizes for the case when the gpu will dirty the data
 -               * anyway again before the next pread happens. */
 +      /* If we're not in the cpu read domain, set ourself into the gtt
 +       * read domain and manually flush cachelines (if required). This
 +       * optimizes for the case when the gpu will dirty the data
 +       * anyway again before the next pread happens.
 +       */
 +      if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
                *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
                                                        obj->cache_level);
 -              ret = i915_gem_object_wait_rendering(obj, true);
 +
 +      if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 +              ret = i915_gem_object_set_to_cpu_domain(obj, false);
                if (ret)
 -                      return ret;
 +                      goto err_unpin;
 +
 +              *needs_clflush = 0;
        }
  
 +      /* return with the pages pinned */
 +      return 0;
 +
 +err_unpin:
 +      i915_gem_object_unpin_pages(obj);
 +      return ret;
 +}
 +
 +int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 +                                   unsigned int *needs_clflush)
 +{
 +      int ret;
 +
 +      *needs_clflush = 0;
 +      if (!i915_gem_object_has_struct_page(obj))
 +              return -ENODEV;
 +
 +      ret = i915_gem_object_wait_rendering(obj, false);
 +      if (ret)
 +              return ret;
 +
        ret = i915_gem_object_get_pages(obj);
        if (ret)
                return ret;
  
        i915_gem_object_pin_pages(obj);
  
 +      i915_gem_object_flush_gtt_write_domain(obj);
 +
 +      /* If we're not in the cpu write domain, set ourself into the
 +       * gtt write domain and manually flush cachelines (as required).
 +       * This optimizes for the case when the gpu will use the data
 +       * right away and we therefore have to clflush anyway.
 +       */
 +      if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
 +              *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
 +
 +      /* Same trick applies to invalidate partially written cachelines read
 +       * before writing.
 +       */
 +      if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
 +              *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
 +                                                       obj->cache_level);
 +
 +      if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 +              ret = i915_gem_object_set_to_cpu_domain(obj, true);
 +              if (ret)
 +                      goto err_unpin;
 +
 +              *needs_clflush = 0;
 +      }
 +
 +      if ((*needs_clflush & CLFLUSH_AFTER) == 0)
 +              obj->cache_dirty = true;
 +
 +      intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 +      obj->dirty = 1;
 +      /* return with the pages pinned */
 +      return 0;
 +
 +err_unpin:
 +      i915_gem_object_unpin_pages(obj);
        return ret;
  }
  
@@@ -819,24 -638,14 +819,24 @@@ i915_gem_gtt_pread(struct drm_device *d
  {
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct i915_ggtt *ggtt = &dev_priv->ggtt;
 +      struct i915_vma *vma;
        struct drm_mm_node node;
        char __user *user_data;
        uint64_t remain;
        uint64_t offset;
        int ret;
  
 -      ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
 -      if (ret) {
 +      vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
 +      if (!IS_ERR(vma)) {
 +              node.start = i915_ggtt_offset(vma);
 +              node.allocated = false;
 +              ret = i915_vma_put_fence(vma);
 +              if (ret) {
 +                      i915_vma_unpin(vma);
 +                      vma = ERR_PTR(ret);
 +              }
 +      }
 +      if (IS_ERR(vma)) {
                ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
                if (ret)
                        goto out;
                }
  
                i915_gem_object_pin_pages(obj);
 -      } else {
 -              node.start = i915_gem_obj_ggtt_offset(obj);
 -              node.allocated = false;
 -              ret = i915_gem_object_put_fence(obj);
 -              if (ret)
 -                      goto out_unpin;
        }
  
        ret = i915_gem_object_set_to_gtt_domain(obj, false);
                 * and write to user memory which may result into page
                 * faults, and so we cannot perform this under struct_mutex.
                 */
 -              if (slow_user_access(ggtt->mappable, page_base,
 +              if (slow_user_access(&ggtt->mappable, page_base,
                                     page_offset, user_data,
                                     page_length, false)) {
                        ret = -EFAULT;
@@@ -924,7 -739,7 +924,7 @@@ out_unpin
                i915_gem_object_unpin_pages(obj);
                remove_mappable_node(&node);
        } else {
 -              i915_gem_object_ggtt_unpin(obj);
 +              i915_vma_unpin(vma);
        }
  out:
        return ret;
@@@ -945,14 -760,19 +945,14 @@@ i915_gem_shmem_pread(struct drm_device 
        int needs_clflush = 0;
        struct sg_page_iter sg_iter;
  
 -      if (!i915_gem_object_has_struct_page(obj))
 -              return -ENODEV;
 -
 -      user_data = u64_to_user_ptr(args->data_ptr);
 -      remain = args->size;
 -
 -      obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 -
        ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
        if (ret)
                return ret;
  
 +      obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 +      user_data = u64_to_user_ptr(args->data_ptr);
        offset = args->offset;
 +      remain = args->size;
  
        for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
                         offset >> PAGE_SHIFT) {
@@@ -1008,7 -828,7 +1008,7 @@@ next_page
        }
  
  out:
 -      i915_gem_object_unpin_pages(obj);
 +      i915_gem_obj_finish_shmem_access(obj);
  
        return ret;
  }
@@@ -1037,27 -857,25 +1037,27 @@@ i915_gem_pread_ioctl(struct drm_device 
                       args->size))
                return -EFAULT;
  
 -      ret = i915_mutex_lock_interruptible(dev);
 -      if (ret)
 -              return ret;
 -
 -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
 -      if (&obj->base == NULL) {
 -              ret = -ENOENT;
 -              goto unlock;
 -      }
 +      obj = i915_gem_object_lookup(file, args->handle);
 +      if (!obj)
 +              return -ENOENT;
  
        /* Bounds check source.  */
        if (args->offset > obj->base.size ||
            args->size > obj->base.size - args->offset) {
                ret = -EINVAL;
 -              goto out;
 +              goto err;
        }
  
        trace_i915_gem_object_pread(obj, args->offset, args->size);
  
 +      ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
 +      if (ret)
 +              goto err;
 +
 +      ret = i915_mutex_lock_interruptible(dev);
 +      if (ret)
 +              goto err;
 +
        ret = i915_gem_shmem_pread(dev, obj, args, file);
  
        /* pread for non shmem backed objects */
                intel_runtime_pm_put(to_i915(dev));
        }
  
 -out:
 -      drm_gem_object_unreference(&obj->base);
 -unlock:
 +      i915_gem_object_put(obj);
        mutex_unlock(&dev->struct_mutex);
 +
 +      return ret;
 +
 +err:
 +      i915_gem_object_put_unlocked(obj);
        return ret;
  }
  
@@@ -1104,7 -919,7 +1104,7 @@@ fast_user_write(struct io_mapping *mapp
  /**
   * This is the fast pwrite path, where we copy the data directly from the
   * user into the GTT, uncached.
 - * @dev: drm device pointer
 + * @i915: i915 device private data
   * @obj: i915 gem object
   * @args: pwrite arguments structure
   * @file: drm file pointer
@@@ -1117,28 -932,17 +1117,28 @@@ i915_gem_gtt_pwrite_fast(struct drm_i91
  {
        struct i915_ggtt *ggtt = &i915->ggtt;
        struct drm_device *dev = obj->base.dev;
 +      struct i915_vma *vma;
        struct drm_mm_node node;
        uint64_t remain, offset;
        char __user *user_data;
        int ret;
        bool hit_slow_path = false;
  
 -      if (obj->tiling_mode != I915_TILING_NONE)
 +      if (i915_gem_object_is_tiled(obj))
                return -EFAULT;
  
 -      ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
 -      if (ret) {
 +      vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 +                                     PIN_MAPPABLE | PIN_NONBLOCK);
 +      if (!IS_ERR(vma)) {
 +              node.start = i915_ggtt_offset(vma);
 +              node.allocated = false;
 +              ret = i915_vma_put_fence(vma);
 +              if (ret) {
 +                      i915_vma_unpin(vma);
 +                      vma = ERR_PTR(ret);
 +              }
 +      }
 +      if (IS_ERR(vma)) {
                ret = insert_mappable_node(i915, &node, PAGE_SIZE);
                if (ret)
                        goto out;
                }
  
                i915_gem_object_pin_pages(obj);
 -      } else {
 -              node.start = i915_gem_obj_ggtt_offset(obj);
 -              node.allocated = false;
 -              ret = i915_gem_object_put_fence(obj);
 -              if (ret)
 -                      goto out_unpin;
        }
  
        ret = i915_gem_object_set_to_gtt_domain(obj, true);
        if (ret)
                goto out_unpin;
  
 -      intel_fb_obj_invalidate(obj, ORIGIN_GTT);
 +      intel_fb_obj_invalidate(obj, ORIGIN_CPU);
        obj->dirty = true;
  
        user_data = u64_to_user_ptr(args->data_ptr);
                 * If the object is non-shmem backed, we retry again with the
                 * path that handles page fault.
                 */
 -              if (fast_user_write(ggtt->mappable, page_base,
 +              if (fast_user_write(&ggtt->mappable, page_base,
                                    page_offset, user_data, page_length)) {
                        hit_slow_path = true;
                        mutex_unlock(&dev->struct_mutex);
 -                      if (slow_user_access(ggtt->mappable,
 +                      if (slow_user_access(&ggtt->mappable,
                                             page_base,
                                             page_offset, user_data,
                                             page_length, true)) {
@@@ -1223,7 -1033,7 +1223,7 @@@ out_flush
                }
        }
  
 -      intel_fb_obj_flush(obj, false, ORIGIN_GTT);
 +      intel_fb_obj_flush(obj, false, ORIGIN_CPU);
  out_unpin:
        if (node.allocated) {
                wmb();
                i915_gem_object_unpin_pages(obj);
                remove_mappable_node(&node);
        } else {
 -              i915_gem_object_ggtt_unpin(obj);
 +              i915_vma_unpin(vma);
        }
  out:
        return ret;
@@@ -1316,17 -1126,41 +1316,17 @@@ i915_gem_shmem_pwrite(struct drm_devic
        int shmem_page_offset, page_length, ret = 0;
        int obj_do_bit17_swizzling, page_do_bit17_swizzling;
        int hit_slowpath = 0;
 -      int needs_clflush_after = 0;
 -      int needs_clflush_before = 0;
 +      unsigned int needs_clflush;
        struct sg_page_iter sg_iter;
  
 -      user_data = u64_to_user_ptr(args->data_ptr);
 -      remain = args->size;
 -
 -      obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 -
 -      if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 -              /* If we're not in the cpu write domain, set ourself into the gtt
 -               * write domain and manually flush cachelines (if required). This
 -               * optimizes for the case when the gpu will use the data
 -               * right away and we therefore have to clflush anyway. */
 -              needs_clflush_after = cpu_write_needs_clflush(obj);
 -              ret = i915_gem_object_wait_rendering(obj, false);
 -              if (ret)
 -                      return ret;
 -      }
 -      /* Same trick applies to invalidate partially written cachelines read
 -       * before writing. */
 -      if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
 -              needs_clflush_before =
 -                      !cpu_cache_is_coherent(dev, obj->cache_level);
 -
 -      ret = i915_gem_object_get_pages(obj);
 +      ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
        if (ret)
                return ret;
  
 -      intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 -
 -      i915_gem_object_pin_pages(obj);
 -
 +      obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 +      user_data = u64_to_user_ptr(args->data_ptr);
        offset = args->offset;
 -      obj->dirty = 1;
 +      remain = args->size;
  
        for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
                         offset >> PAGE_SHIFT) {
                /* If we don't overwrite a cacheline completely we need to be
                 * careful to have up-to-date data by first clflushing. Don't
                 * overcomplicate things and flush the entire patch. */
 -              partial_cacheline_write = needs_clflush_before &&
 +              partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
                        ((shmem_page_offset | page_length)
                                & (boot_cpu_data.x86_clflush_size - 1));
  
                ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
                                        user_data, page_do_bit17_swizzling,
                                        partial_cacheline_write,
 -                                      needs_clflush_after);
 +                                      needs_clflush & CLFLUSH_AFTER);
                if (ret == 0)
                        goto next_page;
  
                ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
                                        user_data, page_do_bit17_swizzling,
                                        partial_cacheline_write,
 -                                      needs_clflush_after);
 +                                      needs_clflush & CLFLUSH_AFTER);
  
                mutex_lock(&dev->struct_mutex);
  
@@@ -1383,7 -1217,7 +1383,7 @@@ next_page
        }
  
  out:
 -      i915_gem_object_unpin_pages(obj);
 +      i915_gem_obj_finish_shmem_access(obj);
  
        if (hit_slowpath) {
                /*
                 * cachelines in-line while writing and the object moved
                 * out of the cpu write domain while we've dropped the lock.
                 */
 -              if (!needs_clflush_after &&
 +              if (!(needs_clflush & CLFLUSH_AFTER) &&
                    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
                        if (i915_gem_clflush_object(obj, obj->pin_display))
 -                              needs_clflush_after = true;
 +                              needs_clflush |= CLFLUSH_AFTER;
                }
        }
  
 -      if (needs_clflush_after)
 +      if (needs_clflush & CLFLUSH_AFTER)
                i915_gem_chipset_flush(to_i915(dev));
 -      else
 -              obj->cache_dirty = true;
  
        intel_fb_obj_flush(obj, false, ORIGIN_CPU);
        return ret;
@@@ -1437,29 -1273,27 +1437,29 @@@ i915_gem_pwrite_ioctl(struct drm_devic
                        return -EFAULT;
        }
  
 -      intel_runtime_pm_get(dev_priv);
 -
 -      ret = i915_mutex_lock_interruptible(dev);
 -      if (ret)
 -              goto put_rpm;
 -
 -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
 -      if (&obj->base == NULL) {
 -              ret = -ENOENT;
 -              goto unlock;
 -      }
 +      obj = i915_gem_object_lookup(file, args->handle);
 +      if (!obj)
 +              return -ENOENT;
  
        /* Bounds check destination. */
        if (args->offset > obj->base.size ||
            args->size > obj->base.size - args->offset) {
                ret = -EINVAL;
 -              goto out;
 +              goto err;
        }
  
        trace_i915_gem_object_pwrite(obj, args->offset, args->size);
  
 +      ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
 +      if (ret)
 +              goto err;
 +
 +      intel_runtime_pm_get(dev_priv);
 +
 +      ret = i915_mutex_lock_interruptible(dev);
 +      if (ret)
 +              goto err_rpm;
 +
        ret = -EFAULT;
        /* We can only do the GTT pwrite on untiled buffers, as otherwise
         * it would end up going through the fenced access, and we'll get
        if (ret == -EFAULT || ret == -ENOSPC) {
                if (obj->phys_handle)
                        ret = i915_gem_phys_pwrite(obj, args, file);
 -              else if (i915_gem_object_has_struct_page(obj))
 -                      ret = i915_gem_shmem_pwrite(dev, obj, args, file);
                else
 -                      ret = -ENODEV;
 +                      ret = i915_gem_shmem_pwrite(dev, obj, args, file);
        }
  
 -out:
 -      drm_gem_object_unreference(&obj->base);
 -unlock:
 +      i915_gem_object_put(obj);
        mutex_unlock(&dev->struct_mutex);
 -put_rpm:
        intel_runtime_pm_put(dev_priv);
  
        return ret;
 -}
 -
 -static int
 -i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
 -{
 -      if (__i915_terminally_wedged(reset_counter))
 -              return -EIO;
 -
 -      if (__i915_reset_in_progress(reset_counter)) {
 -              /* Non-interruptible callers can't handle -EAGAIN, hence return
 -               * -EIO unconditionally for these. */
 -              if (!interruptible)
 -                      return -EIO;
 -
 -              return -EAGAIN;
 -      }
  
 -      return 0;
 +err_rpm:
 +      intel_runtime_pm_put(dev_priv);
 +err:
 +      i915_gem_object_put_unlocked(obj);
 +      return ret;
  }
  
 -static unsigned long local_clock_us(unsigned *cpu)
 +static inline enum fb_op_origin
 +write_origin(struct drm_i915_gem_object *obj, unsigned domain)
  {
 -      unsigned long t;
 -
 -      /* Cheaply and approximately convert from nanoseconds to microseconds.
 -       * The result and subsequent calculations are also defined in the same
 -       * approximate microseconds units. The principal source of timing
 -       * error here is from the simple truncation.
 -       *
 -       * Note that local_clock() is only defined wrt to the current CPU;
 -       * the comparisons are no longer valid if we switch CPUs. Instead of
 -       * blocking preemption for the entire busywait, we can detect the CPU
 -       * switch and use that as indicator of system load and a reason to
 -       * stop busywaiting, see busywait_stop().
 -       */
 -      *cpu = get_cpu();
 -      t = local_clock() >> 10;
 -      put_cpu();
 -
 -      return t;
 -}
 -
 -static bool busywait_stop(unsigned long timeout, unsigned cpu)
 -{
 -      unsigned this_cpu;
 -
 -      if (time_after(local_clock_us(&this_cpu), timeout))
 -              return true;
 -
 -      return this_cpu != cpu;
 -}
 -
 -bool __i915_spin_request(const struct drm_i915_gem_request *req,
 -                       int state, unsigned long timeout_us)
 -{
 -      unsigned cpu;
 -
 -      /* When waiting for high frequency requests, e.g. during synchronous
 -       * rendering split between the CPU and GPU, the finite amount of time
 -       * required to set up the irq and wait upon it limits the response
 -       * rate. By busywaiting on the request completion for a short while we
 -       * can service the high frequency waits as quick as possible. However,
 -       * if it is a slow request, we want to sleep as quickly as possible.
 -       * The tradeoff between waiting and sleeping is roughly the time it
 -       * takes to sleep on a request, on the order of a microsecond.
 -       */
 -
 -      timeout_us += local_clock_us(&cpu);
 -      do {
 -              if (i915_gem_request_completed(req))
 -                      return true;
 -
 -              if (signal_pending_state(state, current))
 -                      break;
 -
 -              if (busywait_stop(timeout_us, cpu))
 -                      break;
 -
 -              cpu_relax_lowlatency();
 -      } while (!need_resched());
 -
 -      return false;
 -}
 -
 -/**
 - * __i915_wait_request - wait until execution of request has finished
 - * @req: duh!
 - * @interruptible: do an interruptible wait (normally yes)
 - * @timeout: in - how long to wait (NULL forever); out - how much time remaining
 - * @rps: RPS client
 - *
 - * Note: It is of utmost importance that the passed in seqno and reset_counter
 - * values have been read by the caller in an smp safe manner. Where read-side
 - * locks are involved, it is sufficient to read the reset_counter before
 - * unlocking the lock that protects the seqno. For lockless tricks, the
 - * reset_counter _must_ be read before, and an appropriate smp_rmb must be
 - * inserted.
 - *
 - * Returns 0 if the request was found within the alloted time. Else returns the
 - * errno with remaining time filled in timeout argument.
 - */
 -int __i915_wait_request(struct drm_i915_gem_request *req,
 -                      bool interruptible,
 -                      s64 *timeout,
 -                      struct intel_rps_client *rps)
 -{
 -      int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 -      DEFINE_WAIT(reset);
 -      struct intel_wait wait;
 -      unsigned long timeout_remain;
 -      s64 before = 0; /* Only to silence a compiler warning. */
 -      int ret = 0;
 -
 -      might_sleep();
 -
 -      if (list_empty(&req->list))
 -              return 0;
 -
 -      if (i915_gem_request_completed(req))
 -              return 0;
 -
 -      timeout_remain = MAX_SCHEDULE_TIMEOUT;
 -      if (timeout) {
 -              if (WARN_ON(*timeout < 0))
 -                      return -EINVAL;
 -
 -              if (*timeout == 0)
 -                      return -ETIME;
 -
 -              timeout_remain = nsecs_to_jiffies_timeout(*timeout);
 -
 -              /*
 -               * Record current time in case interrupted by signal, or wedged.
 -               */
 -              before = ktime_get_raw_ns();
 -      }
 -
 -      trace_i915_gem_request_wait_begin(req);
 -
 -      /* This client is about to stall waiting for the GPU. In many cases
 -       * this is undesirable and limits the throughput of the system, as
 -       * many clients cannot continue processing user input/output whilst
 -       * blocked. RPS autotuning may take tens of milliseconds to respond
 -       * to the GPU load and thus incurs additional latency for the client.
 -       * We can circumvent that by promoting the GPU frequency to maximum
 -       * before we wait. This makes the GPU throttle up much more quickly
 -       * (good for benchmarks and user experience, e.g. window animations),
 -       * but at a cost of spending more power processing the workload
 -       * (bad for battery). Not all clients even want their results
 -       * immediately and for them we should just let the GPU select its own
 -       * frequency to maximise efficiency. To prevent a single client from
 -       * forcing the clocks too high for the whole system, we only allow
 -       * each client to waitboost once in a busy period.
 -       */
 -      if (INTEL_INFO(req->i915)->gen >= 6)
 -              gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 -
 -      /* Optimistic spin for the next ~jiffie before touching IRQs */
 -      if (i915_spin_request(req, state, 5))
 -              goto complete;
 -
 -      set_current_state(state);
 -      add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 -
 -      intel_wait_init(&wait, req->seqno);
 -      if (intel_engine_add_wait(req->engine, &wait))
 -              /* In order to check that we haven't missed the interrupt
 -               * as we enabled it, we need to kick ourselves to do a
 -               * coherent check on the seqno before we sleep.
 -               */
 -              goto wakeup;
 -
 -      for (;;) {
 -              if (signal_pending_state(state, current)) {
 -                      ret = -ERESTARTSYS;
 -                      break;
 -              }
 -
 -              timeout_remain = io_schedule_timeout(timeout_remain);
 -              if (timeout_remain == 0) {
 -                      ret = -ETIME;
 -                      break;
 -              }
 -
 -              if (intel_wait_complete(&wait))
 -                      break;
 -
 -              set_current_state(state);
 -
 -wakeup:
 -              /* Carefully check if the request is complete, giving time
 -               * for the seqno to be visible following the interrupt.
 -               * We also have to check in case we are kicked by the GPU
 -               * reset in order to drop the struct_mutex.
 -               */
 -              if (__i915_request_irq_complete(req))
 -                      break;
 -
 -              /* Only spin if we know the GPU is processing this request */
 -              if (i915_spin_request(req, state, 2))
 -                      break;
 -      }
 -      remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 -
 -      intel_engine_remove_wait(req->engine, &wait);
 -      __set_current_state(TASK_RUNNING);
 -complete:
 -      trace_i915_gem_request_wait_end(req);
 -
 -      if (timeout) {
 -              s64 tres = *timeout - (ktime_get_raw_ns() - before);
 -
 -              *timeout = tres < 0 ? 0 : tres;
 -
 -              /*
 -               * Apparently ktime isn't accurate enough and occasionally has a
 -               * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
 -               * things up to make the test happy. We allow up to 1 jiffy.
 -               *
 -               * This is a regrssion from the timespec->ktime conversion.
 -               */
 -              if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
 -                      *timeout = 0;
 -      }
 -
 -      if (rps && req->seqno == req->engine->last_submitted_seqno) {
 -              /* The GPU is now idle and this client has stalled.
 -               * Since no other client has submitted a request in the
 -               * meantime, assume that this client is the only one
 -               * supplying work to the GPU but is unable to keep that
 -               * work supplied because it is waiting. Since the GPU is
 -               * then never kept fully busy, RPS autoclocking will
 -               * keep the clocks relatively low, causing further delays.
 -               * Compensate by giving the synchronous client credit for
 -               * a waitboost next time.
 -               */
 -              spin_lock(&req->i915->rps.client_lock);
 -              list_del_init(&rps->link);
 -              spin_unlock(&req->i915->rps.client_lock);
 -      }
 -
 -      return ret;
 -}
 -
 -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
 -                                 struct drm_file *file)
 -{
 -      struct drm_i915_file_private *file_priv;
 -
 -      WARN_ON(!req || !file || req->file_priv);
 -
 -      if (!req || !file)
 -              return -EINVAL;
 -
 -      if (req->file_priv)
 -              return -EINVAL;
 -
 -      file_priv = file->driver_priv;
 -
 -      spin_lock(&file_priv->mm.lock);
 -      req->file_priv = file_priv;
 -      list_add_tail(&req->client_list, &file_priv->mm.request_list);
 -      spin_unlock(&file_priv->mm.lock);
 -
 -      req->pid = get_pid(task_pid(current));
 -
 -      return 0;
 -}
 -
 -static inline void
 -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
 -{
 -      struct drm_i915_file_private *file_priv = request->file_priv;
 -
 -      if (!file_priv)
 -              return;
 -
 -      spin_lock(&file_priv->mm.lock);
 -      list_del(&request->client_list);
 -      request->file_priv = NULL;
 -      spin_unlock(&file_priv->mm.lock);
 -
 -      put_pid(request->pid);
 -      request->pid = NULL;
 -}
 -
 -static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 -{
 -      trace_i915_gem_request_retire(request);
 -
 -      /* We know the GPU must have read the request to have
 -       * sent us the seqno + interrupt, so use the position
 -       * of tail of the request to update the last known position
 -       * of the GPU head.
 -       *
 -       * Note this requires that we are always called in request
 -       * completion order.
 -       */
 -      request->ringbuf->last_retired_head = request->postfix;
 -
 -      list_del_init(&request->list);
 -      i915_gem_request_remove_from_client(request);
 -
 -      if (request->previous_context) {
 -              if (i915.enable_execlists)
 -                      intel_lr_context_unpin(request->previous_context,
 -                                             request->engine);
 -      }
 -
 -      i915_gem_context_unreference(request->ctx);
 -      i915_gem_request_unreference(request);
 -}
 -
 -static void
 -__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
 -{
 -      struct intel_engine_cs *engine = req->engine;
 -      struct drm_i915_gem_request *tmp;
 -
 -      lockdep_assert_held(&engine->i915->drm.struct_mutex);
 -
 -      if (list_empty(&req->list))
 -              return;
 -
 -      do {
 -              tmp = list_first_entry(&engine->request_list,
 -                                     typeof(*tmp), list);
 -
 -              i915_gem_request_retire(tmp);
 -      } while (tmp != req);
 -
 -      WARN_ON(i915_verify_lists(engine->dev));
 -}
 -
 -/**
 - * Waits for a request to be signaled, and cleans up the
 - * request and object lists appropriately for that event.
 - * @req: request to wait on
 - */
 -int
 -i915_wait_request(struct drm_i915_gem_request *req)
 -{
 -      struct drm_i915_private *dev_priv = req->i915;
 -      bool interruptible;
 -      int ret;
 -
 -      interruptible = dev_priv->mm.interruptible;
 -
 -      BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex));
 -
 -      ret = __i915_wait_request(req, interruptible, NULL, NULL);
 -      if (ret)
 -              return ret;
 -
 -      /* If the GPU hung, we want to keep the requests to find the guilty. */
 -      if (!i915_reset_in_progress(&dev_priv->gpu_error))
 -              __i915_gem_request_retire__upto(req);
 -
 -      return 0;
 -}
 -
 -/**
 - * Ensures that all rendering to the object has completed and the object is
 - * safe to unbind from the GTT or access from the CPU.
 - * @obj: i915 gem object
 - * @readonly: waiting for read access or write
 - */
 -int
 -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 -                             bool readonly)
 -{
 -      int ret, i;
 -
 -      if (!obj->active)
 -              return 0;
 -
 -      if (readonly) {
 -              if (obj->last_write_req != NULL) {
 -                      ret = i915_wait_request(obj->last_write_req);
 -                      if (ret)
 -                              return ret;
 -
 -                      i = obj->last_write_req->engine->id;
 -                      if (obj->last_read_req[i] == obj->last_write_req)
 -                              i915_gem_object_retire__read(obj, i);
 -                      else
 -                              i915_gem_object_retire__write(obj);
 -              }
 -      } else {
 -              for (i = 0; i < I915_NUM_ENGINES; i++) {
 -                      if (obj->last_read_req[i] == NULL)
 -                              continue;
 -
 -                      ret = i915_wait_request(obj->last_read_req[i]);
 -                      if (ret)
 -                              return ret;
 -
 -                      i915_gem_object_retire__read(obj, i);
 -              }
 -              GEM_BUG_ON(obj->active);
 -      }
 -
 -      return 0;
 -}
 -
 -static void
 -i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
 -                             struct drm_i915_gem_request *req)
 -{
 -      int ring = req->engine->id;
 -
 -      if (obj->last_read_req[ring] == req)
 -              i915_gem_object_retire__read(obj, ring);
 -      else if (obj->last_write_req == req)
 -              i915_gem_object_retire__write(obj);
 -
 -      if (!i915_reset_in_progress(&req->i915->gpu_error))
 -              __i915_gem_request_retire__upto(req);
 -}
 -
 -/* A nonblocking variant of the above wait. This is a highly dangerous routine
 - * as the object state may change during this call.
 - */
 -static __must_check int
 -i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
 -                                          struct intel_rps_client *rps,
 -                                          bool readonly)
 -{
 -      struct drm_device *dev = obj->base.dev;
 -      struct drm_i915_private *dev_priv = to_i915(dev);
 -      struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
 -      int ret, i, n = 0;
 -
 -      BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 -      BUG_ON(!dev_priv->mm.interruptible);
 -
 -      if (!obj->active)
 -              return 0;
 -
 -      if (readonly) {
 -              struct drm_i915_gem_request *req;
 -
 -              req = obj->last_write_req;
 -              if (req == NULL)
 -                      return 0;
 -
 -              requests[n++] = i915_gem_request_reference(req);
 -      } else {
 -              for (i = 0; i < I915_NUM_ENGINES; i++) {
 -                      struct drm_i915_gem_request *req;
 -
 -                      req = obj->last_read_req[i];
 -                      if (req == NULL)
 -                              continue;
 -
 -                      requests[n++] = i915_gem_request_reference(req);
 -              }
 -      }
 -
 -      mutex_unlock(&dev->struct_mutex);
 -      ret = 0;
 -      for (i = 0; ret == 0 && i < n; i++)
 -              ret = __i915_wait_request(requests[i], true, NULL, rps);
 -      mutex_lock(&dev->struct_mutex);
 -
 -      for (i = 0; i < n; i++) {
 -              if (ret == 0)
 -                      i915_gem_object_retire_request(obj, requests[i]);
 -              i915_gem_request_unreference(requests[i]);
 -      }
 -
 -      return ret;
 -}
 -
 -static struct intel_rps_client *to_rps_client(struct drm_file *file)
 -{
 -      struct drm_i915_file_private *fpriv = file->driver_priv;
 -      return &fpriv->rps;
 -}
 -
 -static enum fb_op_origin
 -write_origin(struct drm_i915_gem_object *obj, unsigned domain)
 -{
 -      return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
 -             ORIGIN_GTT : ORIGIN_CPU;
 +      return (domain == I915_GEM_DOMAIN_GTT ?
 +              obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
  }
  
  /**
@@@ -1520,7 -1831,10 +1520,7 @@@ i915_gem_set_domain_ioctl(struct drm_de
        int ret;
  
        /* Only handle setting domains to types used by the CPU. */
 -      if (write_domain & I915_GEM_GPU_DOMAINS)
 -              return -EINVAL;
 -
 -      if (read_domains & I915_GEM_GPU_DOMAINS)
 +      if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
                return -EINVAL;
  
        /* Having something in the write domain implies it's in the read
        if (write_domain != 0 && read_domains != write_domain)
                return -EINVAL;
  
 -      ret = i915_mutex_lock_interruptible(dev);
 -      if (ret)
 -              return ret;
 -
 -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
 -      if (&obj->base == NULL) {
 -              ret = -ENOENT;
 -              goto unlock;
 -      }
 +      obj = i915_gem_object_lookup(file, args->handle);
 +      if (!obj)
 +              return -ENOENT;
  
        /* Try to flush the object off the GPU without holding the lock.
         * We will repeat the flush holding the lock in the normal manner
         * to catch cases where we are gazumped.
         */
 -      ret = i915_gem_object_wait_rendering__nonblocking(obj,
 -                                                        to_rps_client(file),
 -                                                        !write_domain);
 +      ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
 +      if (ret)
 +              goto err;
 +
 +      ret = i915_mutex_lock_interruptible(dev);
        if (ret)
 -              goto unref;
 +              goto err;
  
        if (read_domains & I915_GEM_DOMAIN_GTT)
                ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
        if (write_domain != 0)
                intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
  
 -unref:
 -      drm_gem_object_unreference(&obj->base);
 -unlock:
 +      i915_gem_object_put(obj);
        mutex_unlock(&dev->struct_mutex);
        return ret;
 +
 +err:
 +      i915_gem_object_put_unlocked(obj);
 +      return ret;
  }
  
  /**
@@@ -1574,23 -1890,26 +1574,23 @@@ i915_gem_sw_finish_ioctl(struct drm_dev
  {
        struct drm_i915_gem_sw_finish *args = data;
        struct drm_i915_gem_object *obj;
 -      int ret = 0;
 +      int err = 0;
  
 -      ret = i915_mutex_lock_interruptible(dev);
 -      if (ret)
 -              return ret;
 -
 -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
 -      if (&obj->base == NULL) {
 -              ret = -ENOENT;
 -              goto unlock;
 -      }
 +      obj = i915_gem_object_lookup(file, args->handle);
 +      if (!obj)
 +              return -ENOENT;
  
        /* Pinned buffers may be scanout, so flush the cache */
 -      if (obj->pin_display)
 -              i915_gem_object_flush_cpu_write_domain(obj);
 +      if (READ_ONCE(obj->pin_display)) {
 +              err = i915_mutex_lock_interruptible(dev);
 +              if (!err) {
 +                      i915_gem_object_flush_cpu_write_domain(obj);
 +                      mutex_unlock(&dev->struct_mutex);
 +              }
 +      }
  
 -      drm_gem_object_unreference(&obj->base);
 -unlock:
 -      mutex_unlock(&dev->struct_mutex);
 -      return ret;
 +      i915_gem_object_put_unlocked(obj);
 +      return err;
  }
  
  /**
@@@ -1618,7 -1937,7 +1618,7 @@@ i915_gem_mmap_ioctl(struct drm_device *
                    struct drm_file *file)
  {
        struct drm_i915_gem_mmap *args = data;
 -      struct drm_gem_object *obj;
 +      struct drm_i915_gem_object *obj;
        unsigned long addr;
  
        if (args->flags & ~(I915_MMAP_WC))
        if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
                return -ENODEV;
  
 -      obj = drm_gem_object_lookup(file, args->handle);
 -      if (obj == NULL)
 +      obj = i915_gem_object_lookup(file, args->handle);
 +      if (!obj)
                return -ENOENT;
  
        /* prime objects have no backing filp to GEM mmap
         * pages from.
         */
 -      if (!obj->filp) {
 -              drm_gem_object_unreference_unlocked(obj);
 +      if (!obj->base.filp) {
 +              i915_gem_object_put_unlocked(obj);
                return -EINVAL;
        }
  
 -      addr = vm_mmap(obj->filp, 0, args->size,
 +      addr = vm_mmap(obj->base.filp, 0, args->size,
                       PROT_READ | PROT_WRITE, MAP_SHARED,
                       args->offset);
        if (args->flags & I915_MMAP_WC) {
                struct vm_area_struct *vma;
  
                if (down_write_killable(&mm->mmap_sem)) {
 -                      drm_gem_object_unreference_unlocked(obj);
 +                      i915_gem_object_put_unlocked(obj);
                        return -EINTR;
                }
                vma = find_vma(mm, addr);
                up_write(&mm->mmap_sem);
  
                /* This may race, but that's ok, it only gets set */
 -              WRITE_ONCE(to_intel_bo(obj)->has_wc_mmap, true);
 +              WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
        }
 -      drm_gem_object_unreference_unlocked(obj);
 +      i915_gem_object_put_unlocked(obj);
        if (IS_ERR((void *)addr))
                return addr;
  
        return 0;
  }
  
 +static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
 +{
 +      u64 size;
 +
 +      size = i915_gem_object_get_stride(obj);
 +      size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
 +
 +      return size >> PAGE_SHIFT;
 +}
 +
 +/**
 + * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
 + *
 + * A history of the GTT mmap interface:
 + *
 + * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
 + *     aligned and suitable for fencing, and still fit into the available
 + *     mappable space left by the pinned display objects. A classic problem
 + *     we called the page-fault-of-doom where we would ping-pong between
 + *     two objects that could not fit inside the GTT and so the memcpy
 + *     would page one object in at the expense of the other between every
 + *     single byte.
 + *
 + * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
 + *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
 + *     object is too large for the available space (or simply too large
 + *     for the mappable aperture!), a view is created instead and faulted
 + *     into userspace. (This view is aligned and sized appropriately for
 + *     fenced access.)
 + *
 + * Restrictions:
 + *
 + *  * snoopable objects cannot be accessed via the GTT. It can cause machine
 + *    hangs on some architectures, corruption on others. An attempt to service
 + *    a GTT page fault from a snoopable object will generate a SIGBUS.
 + *
 + *  * the object must be able to fit into RAM (physical memory, though no
 + *    limited to the mappable aperture).
 + *
 + *
 + * Caveats:
 + *
 + *  * a new GTT page fault will synchronize rendering from the GPU and flush
 + *    all data to system memory. Subsequent access will not be synchronized.
 + *
 + *  * all mappings are revoked on runtime device suspend.
 + *
 + *  * there are only 8, 16 or 32 fence registers to share between all users
 + *    (older machines require fence register for display and blitter access
 + *    as well). Contention of the fence registers will cause the previous users
 + *    to be unmapped and any new access will generate new page faults.
 + *
 + *  * running out of memory while servicing a fault may generate a SIGBUS,
 + *    rather than the expected SIGSEGV.
 + */
 +int i915_gem_mmap_gtt_version(void)
 +{
 +      return 1;
 +}
 +
  /**
   * i915_gem_fault - fault a page into the GTT
 - * @vma: VMA in question
 + * @area: CPU VMA in question
   * @vmf: fault info
   *
   * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
   * from the GTT and/or fence registers to make room.  So performance may
   * suffer if the GTT working set is large or there are few fence registers
   * left.
 + *
 + * The current feature set supported by i915_gem_fault() and thus GTT mmaps
 + * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
   */
 -int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 +int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
  {
 -      struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
 +#define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
 +      struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
        struct drm_device *dev = obj->base.dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct i915_ggtt *ggtt = &dev_priv->ggtt;
 -      struct i915_ggtt_view view = i915_ggtt_view_normal;
 -      pgoff_t page_offset;
 -      unsigned long pfn;
 -      int ret = 0;
        bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 -
 -      intel_runtime_pm_get(dev_priv);
 +      struct i915_vma *vma;
 +      pgoff_t page_offset;
 +      unsigned int flags;
 +      int ret;
  
        /* We don't use vmf->pgoff since that has the fake offset */
 -      page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
 +      page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >>
                PAGE_SHIFT;
  
 -      ret = i915_mutex_lock_interruptible(dev);
 -      if (ret)
 -              goto out;
 -
        trace_i915_gem_object_fault(obj, page_offset, true, write);
  
        /* Try to flush the object off the GPU first without holding the lock.
 -       * Upon reacquiring the lock, we will perform our sanity checks and then
 +       * Upon acquiring the lock, we will perform our sanity checks and then
         * repeat the flush holding the lock in the normal manner to catch cases
         * where we are gazumped.
         */
 -      ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
 +      ret = __unsafe_wait_rendering(obj, NULL, !write);
        if (ret)
 -              goto unlock;
 +              goto err;
 +
 +      intel_runtime_pm_get(dev_priv);
 +
 +      ret = i915_mutex_lock_interruptible(dev);
 +      if (ret)
 +              goto err_rpm;
  
        /* Access to snoopable pages through the GTT is incoherent. */
        if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
                ret = -EFAULT;
 -              goto unlock;
 +              goto err_unlock;
        }
  
 -      /* Use a partial view if the object is bigger than the aperture. */
 -      if (obj->base.size >= ggtt->mappable_end &&
 -          obj->tiling_mode == I915_TILING_NONE) {
 -              static const unsigned int chunk_size = 256; // 1 MiB
 +      /* If the object is smaller than a couple of partial vma, it is
 +       * not worth only creating a single partial vma - we may as well
 +       * clear enough space for the full object.
 +       */
 +      flags = PIN_MAPPABLE;
 +      if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
 +              flags |= PIN_NONBLOCK | PIN_NONFAULT;
 +
 +      /* Now pin it into the GTT as needed */
 +      vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
 +      if (IS_ERR(vma)) {
 +              struct i915_ggtt_view view;
 +              unsigned int chunk_size;
 +
 +              /* Use a partial view if it is bigger than available space */
 +              chunk_size = MIN_CHUNK_PAGES;
 +              if (i915_gem_object_is_tiled(obj))
 +                      chunk_size = max(chunk_size, tile_row_pages(obj));
  
                memset(&view, 0, sizeof(view));
                view.type = I915_GGTT_VIEW_PARTIAL;
                view.params.partial.offset = rounddown(page_offset, chunk_size);
                view.params.partial.size =
 -                      min_t(unsigned int,
 -                            chunk_size,
 -                            (vma->vm_end - vma->vm_start)/PAGE_SIZE -
 +                      min_t(unsigned int, chunk_size,
 +                            (area->vm_end - area->vm_start) / PAGE_SIZE -
                              view.params.partial.offset);
 -      }
  
 -      /* Now pin it into the GTT if needed */
 -      ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
 -      if (ret)
 -              goto unlock;
 +              /* If the partial covers the entire object, just create a
 +               * normal VMA.
 +               */
 +              if (chunk_size >= obj->base.size >> PAGE_SHIFT)
 +                      view.type = I915_GGTT_VIEW_NORMAL;
 +
 +              /* Userspace is now writing through an untracked VMA, abandon
 +               * all hope that the hardware is able to track future writes.
 +               */
 +              obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
 +
 +              vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
 +      }
 +      if (IS_ERR(vma)) {
 +              ret = PTR_ERR(vma);
 +              goto err_unlock;
 +      }
  
        ret = i915_gem_object_set_to_gtt_domain(obj, write);
        if (ret)
 -              goto unpin;
 +              goto err_unpin;
  
 -      ret = i915_gem_object_get_fence(obj);
 +      ret = i915_vma_get_fence(vma);
        if (ret)
 -              goto unpin;
 +              goto err_unpin;
  
        /* Finally, remap it using the new GTT offset */
 -      pfn = ggtt->mappable_base +
 -              i915_gem_obj_ggtt_offset_view(obj, &view);
 -      pfn >>= PAGE_SHIFT;
 -
 -      if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
 -              /* Overriding existing pages in partial view does not cause
 -               * us any trouble as TLBs are still valid because the fault
 -               * is due to userspace losing part of the mapping or never
 -               * having accessed it before (at this partials' range).
 -               */
 -              unsigned long base = vma->vm_start +
 -                                   (view.params.partial.offset << PAGE_SHIFT);
 -              unsigned int i;
 -
 -              for (i = 0; i < view.params.partial.size; i++) {
 -                      ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
 -                      if (ret)
 -                              break;
 -              }
 -
 -              obj->fault_mappable = true;
 -      } else {
 -              if (!obj->fault_mappable) {
 -                      unsigned long size = min_t(unsigned long,
 -                                                 vma->vm_end - vma->vm_start,
 -                                                 obj->base.size);
 -                      int i;
 -
 -                      for (i = 0; i < size >> PAGE_SHIFT; i++) {
 -                              ret = vm_insert_pfn(vma,
 -                                                  (unsigned long)vma->vm_start + i * PAGE_SIZE,
 -                                                  pfn + i);
 -                              if (ret)
 -                                      break;
 -                      }
 +      ret = remap_io_mapping(area,
 +                             area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
 +                             (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
 +                             min_t(u64, vma->size, area->vm_end - area->vm_start),
 +                             &ggtt->mappable);
 +      if (ret)
 +              goto err_unpin;
  
 -                      obj->fault_mappable = true;
 -              } else
 -                      ret = vm_insert_pfn(vma,
 -                                          (unsigned long)vmf->virtual_address,
 -                                          pfn + page_offset);
 -      }
 -unpin:
 -      i915_gem_object_ggtt_unpin_view(obj, &view);
 -unlock:
 +      obj->fault_mappable = true;
 +err_unpin:
 +      __i915_vma_unpin(vma);
 +err_unlock:
        mutex_unlock(&dev->struct_mutex);
 -out:
 +err_rpm:
 +      intel_runtime_pm_put(dev_priv);
 +err:
        switch (ret) {
        case -EIO:
                /*
                ret = VM_FAULT_SIGBUS;
                break;
        }
 -
 -      intel_runtime_pm_put(dev_priv);
        return ret;
  }
  
@@@ -1952,58 -2215,46 +1952,58 @@@ i915_gem_release_all_mmaps(struct drm_i
                i915_gem_release_mmap(obj);
  }
  
 -uint32_t
 -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
 +/**
 + * i915_gem_get_ggtt_size - return required global GTT size for an object
 + * @dev_priv: i915 device
 + * @size: object size
 + * @tiling_mode: tiling mode
 + *
 + * Return the required global GTT size for an object, taking into account
 + * potential fence register mapping.
 + */
 +u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
 +                         u64 size, int tiling_mode)
  {
 -      uint32_t gtt_size;
 +      u64 ggtt_size;
  
 -      if (INTEL_INFO(dev)->gen >= 4 ||
 +      GEM_BUG_ON(size == 0);
 +
 +      if (INTEL_GEN(dev_priv) >= 4 ||
            tiling_mode == I915_TILING_NONE)
                return size;
  
        /* Previous chips need a power-of-two fence region when tiling */
 -      if (IS_GEN3(dev))
 -              gtt_size = 1024*1024;
 +      if (IS_GEN3(dev_priv))
 +              ggtt_size = 1024*1024;
        else
 -              gtt_size = 512*1024;
 +              ggtt_size = 512*1024;
  
 -      while (gtt_size < size)
 -              gtt_size <<= 1;
 +      while (ggtt_size < size)
 +              ggtt_size <<= 1;
  
 -      return gtt_size;
 +      return ggtt_size;
  }
  
  /**
 - * i915_gem_get_gtt_alignment - return required GTT alignment for an object
 - * @dev: drm device
 + * i915_gem_get_ggtt_alignment - return required global GTT alignment
 + * @dev_priv: i915 device
   * @size: object size
   * @tiling_mode: tiling mode
 - * @fenced: is fenced alignemned required or not
 + * @fenced: is fenced alignment required or not
   *
 - * Return the required GTT alignment for an object, taking into account
 + * Return the required global GTT alignment for an object, taking into account
   * potential fence register mapping.
   */
 -uint32_t
 -i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
 -                         int tiling_mode, bool fenced)
 +u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
 +                              int tiling_mode, bool fenced)
  {
 +      GEM_BUG_ON(size == 0);
 +
        /*
         * Minimum alignment is 4k (GTT page size), but might be greater
         * if a fence register is needed for the object.
         */
 -      if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
 +      if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
            tiling_mode == I915_TILING_NONE)
                return 4096;
  
         * Previous chips need to be aligned to the size of the smallest
         * fence register that can contain the object.
         */
 -      return i915_gem_get_gtt_size(dev, size, tiling_mode);
 +      return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
  }
  
  static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
  {
        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 -      int ret;
 -
 -      dev_priv->mm.shrinker_no_lock_stealing = true;
 +      int err;
  
 -      ret = drm_gem_create_mmap_offset(&obj->base);
 -      if (ret != -ENOSPC)
 -              goto out;
 +      err = drm_gem_create_mmap_offset(&obj->base);
 +      if (!err)
 +              return 0;
  
 -      /* Badly fragmented mmap space? The only way we can recover
 -       * space is by destroying unwanted objects. We can't randomly release
 -       * mmap_offsets as userspace expects them to be persistent for the
 -       * lifetime of the objects. The closest we can is to release the
 -       * offsets on purgeable objects by truncating it and marking it purged,
 -       * which prevents userspace from ever using that object again.
 +      /* We can idle the GPU locklessly to flush stale objects, but in order
 +       * to claim that space for ourselves, we need to take the big
 +       * struct_mutex to free the requests+objects and allocate our slot.
         */
 -      i915_gem_shrink(dev_priv,
 -                      obj->base.size >> PAGE_SHIFT,
 -                      I915_SHRINK_BOUND |
 -                      I915_SHRINK_UNBOUND |
 -                      I915_SHRINK_PURGEABLE);
 -      ret = drm_gem_create_mmap_offset(&obj->base);
 -      if (ret != -ENOSPC)
 -              goto out;
 +      err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 +      if (err)
 +              return err;
  
 -      i915_gem_shrink_all(dev_priv);
 -      ret = drm_gem_create_mmap_offset(&obj->base);
 -out:
 -      dev_priv->mm.shrinker_no_lock_stealing = false;
 +      err = i915_mutex_lock_interruptible(&dev_priv->drm);
 +      if (!err) {
 +              i915_gem_retire_requests(dev_priv);
 +              err = drm_gem_create_mmap_offset(&obj->base);
 +              mutex_unlock(&dev_priv->drm.struct_mutex);
 +      }
  
 -      return ret;
 +      return err;
  }
  
  static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
@@@ -2055,15 -2314,32 +2055,15 @@@ i915_gem_mmap_gtt(struct drm_file *file
        struct drm_i915_gem_object *obj;
        int ret;
  
 -      ret = i915_mutex_lock_interruptible(dev);
 -      if (ret)
 -              return ret;
 -
 -      obj = to_intel_bo(drm_gem_object_lookup(file, handle));
 -      if (&obj->base == NULL) {
 -              ret = -ENOENT;
 -              goto unlock;
 -      }
 -
 -      if (obj->madv != I915_MADV_WILLNEED) {
 -              DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
 -              ret = -EFAULT;
 -              goto out;
 -      }
 +      obj = i915_gem_object_lookup(file, handle);
 +      if (!obj)
 +              return -ENOENT;
  
        ret = i915_gem_object_create_mmap_offset(obj);
 -      if (ret)
 -              goto out;
 -
 -      *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
 +      if (ret == 0)
 +              *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
  
 -out:
 -      drm_gem_object_unreference(&obj->base);
 -unlock:
 -      mutex_unlock(&dev->struct_mutex);
 +      i915_gem_object_put_unlocked(obj);
        return ret;
  }
  
@@@ -2181,7 -2457,7 +2181,7 @@@ i915_gem_object_put_pages(struct drm_i9
        if (obj->pages_pin_count)
                return -EBUSY;
  
 -      BUG_ON(i915_gem_obj_bound_any(obj));
 +      GEM_BUG_ON(obj->bind_count);
  
        /* ->put_pages might need to allocate memory for the bit17 swizzle
         * array, hence protect them from being reaped by removing them from gtt
        list_del(&obj->global_list);
  
        if (obj->mapping) {
 -              if (is_vmalloc_addr(obj->mapping))
 -                      vunmap(obj->mapping);
 +              void *ptr;
 +
 +              ptr = ptr_mask_bits(obj->mapping);
 +              if (is_vmalloc_addr(ptr))
 +                      vunmap(ptr);
                else
 -                      kunmap(kmap_to_page(obj->mapping));
 +                      kunmap(kmap_to_page(ptr));
 +
                obj->mapping = NULL;
        }
  
@@@ -2305,7 -2577,7 +2305,7 @@@ i915_gem_object_get_pages_gtt(struct dr
        if (i915_gem_object_needs_bit17_swizzle(obj))
                i915_gem_object_do_bit_17_swizzle(obj);
  
 -      if (obj->tiling_mode != I915_TILING_NONE &&
 +      if (i915_gem_object_is_tiled(obj) &&
            dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
                i915_gem_object_pin_pages(obj);
  
@@@ -2369,8 -2641,7 +2369,8 @@@ i915_gem_object_get_pages(struct drm_i9
  }
  
  /* The 'mapping' part of i915_gem_object_pin_map() below */
 -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
 +                               enum i915_map_type type)
  {
        unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
        struct sg_table *sgt = obj->pages;
        struct page *stack_pages[32];
        struct page **pages = stack_pages;
        unsigned long i = 0;
 +      pgprot_t pgprot;
        void *addr;
  
        /* A single page can always be kmapped */
 -      if (n_pages == 1)
 +      if (n_pages == 1 && type == I915_MAP_WB)
                return kmap(sg_page(sgt->sgl));
  
        if (n_pages > ARRAY_SIZE(stack_pages)) {
        /* Check that we have the expected number of pages */
        GEM_BUG_ON(i != n_pages);
  
 -      addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
 +      switch (type) {
 +      case I915_MAP_WB:
 +              pgprot = PAGE_KERNEL;
 +              break;
 +      case I915_MAP_WC:
 +              pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
 +              break;
 +      }
 +      addr = vmap(pages, n_pages, 0, pgprot);
  
        if (pages != stack_pages)
                drm_free_large(pages);
  }
  
  /* get, pin, and map the pages of the object into kernel space */
 -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 +                            enum i915_map_type type)
  {
 +      enum i915_map_type has_type;
 +      bool pinned;
 +      void *ptr;
        int ret;
  
        lockdep_assert_held(&obj->base.dev->struct_mutex);
 +      GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
  
        ret = i915_gem_object_get_pages(obj);
        if (ret)
                return ERR_PTR(ret);
  
        i915_gem_object_pin_pages(obj);
 +      pinned = obj->pages_pin_count > 1;
  
 -      if (!obj->mapping) {
 -              obj->mapping = i915_gem_object_map(obj);
 -              if (!obj->mapping) {
 -                      i915_gem_object_unpin_pages(obj);
 -                      return ERR_PTR(-ENOMEM);
 +      ptr = ptr_unpack_bits(obj->mapping, has_type);
 +      if (ptr && has_type != type) {
 +              if (pinned) {
 +                      ret = -EBUSY;
 +                      goto err;
                }
 -      }
  
 -      return obj->mapping;
 -}
 +              if (is_vmalloc_addr(ptr))
 +                      vunmap(ptr);
 +              else
 +                      kunmap(kmap_to_page(ptr));
  
 -void i915_vma_move_to_active(struct i915_vma *vma,
 -                           struct drm_i915_gem_request *req)
 -{
 -      struct drm_i915_gem_object *obj = vma->obj;
 -      struct intel_engine_cs *engine;
 +              ptr = obj->mapping = NULL;
 +      }
  
 -      engine = i915_gem_request_get_engine(req);
 +      if (!ptr) {
 +              ptr = i915_gem_object_map(obj, type);
 +              if (!ptr) {
 +                      ret = -ENOMEM;
 +                      goto err;
 +              }
  
 -      /* Add a reference if we're newly entering the active list. */
 -      if (obj->active == 0)
 -              drm_gem_object_reference(&obj->base);
 -      obj->active |= intel_engine_flag(engine);
 +              obj->mapping = ptr_pack_bits(ptr, type);
 +      }
  
 -      list_move_tail(&obj->engine_list[engine->id], &engine->active_list);
 -      i915_gem_request_assign(&obj->last_read_req[engine->id], req);
 +      return ptr;
  
 -      list_move_tail(&vma->vm_link, &vma->vm->active_list);
 +err:
 +      i915_gem_object_unpin_pages(obj);
 +      return ERR_PTR(ret);
  }
  
  static void
 -i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
 +i915_gem_object_retire__write(struct i915_gem_active *active,
 +                            struct drm_i915_gem_request *request)
  {
 -      GEM_BUG_ON(obj->last_write_req == NULL);
 -      GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine)));
 +      struct drm_i915_gem_object *obj =
 +              container_of(active, struct drm_i915_gem_object, last_write);
  
 -      i915_gem_request_assign(&obj->last_write_req, NULL);
        intel_fb_obj_flush(obj, true, ORIGIN_CS);
  }
  
  static void
 -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
 +i915_gem_object_retire__read(struct i915_gem_active *active,
 +                           struct drm_i915_gem_request *request)
  {
 -      struct i915_vma *vma;
 -
 -      GEM_BUG_ON(obj->last_read_req[ring] == NULL);
 -      GEM_BUG_ON(!(obj->active & (1 << ring)));
 -
 -      list_del_init(&obj->engine_list[ring]);
 -      i915_gem_request_assign(&obj->last_read_req[ring], NULL);
 -
 -      if (obj->last_write_req && obj->last_write_req->engine->id == ring)
 -              i915_gem_object_retire__write(obj);
 -
 -      obj->active &= ~(1 << ring);
 -      if (obj->active)
 -              return;
 -
 -      /* Bump our place on the bound list to keep it roughly in LRU order
 -       * so that we don't steal from recently used but inactive objects
 -       * (unless we are forced to ofc!)
 -       */
 -      list_move_tail(&obj->global_list,
 -                     &to_i915(obj->base.dev)->mm.bound_list);
 -
 -      list_for_each_entry(vma, &obj->vma_list, obj_link) {
 -              if (!list_empty(&vma->vm_link))
 -                      list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 -      }
 -
 -      i915_gem_request_assign(&obj->last_fenced_req, NULL);
 -      drm_gem_object_unreference(&obj->base);
 -}
 -
 -static int
 -i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
 -{
 -      struct intel_engine_cs *engine;
 -      int ret;
 -
 -      /* Carefully retire all requests without writing to the rings */
 -      for_each_engine(engine, dev_priv) {
 -              ret = intel_engine_idle(engine);
 -              if (ret)
 -                      return ret;
 -      }
 -      i915_gem_retire_requests(dev_priv);
 -
 -      /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
 -      if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
 -              while (intel_kick_waiters(dev_priv) ||
 -                     intel_kick_signalers(dev_priv))
 -                      yield();
 -      }
 -
 -      /* Finally reset hw state */
 -      for_each_engine(engine, dev_priv)
 -              intel_ring_init_seqno(engine, seqno);
 -
 -      return 0;
 -}
 -
 -int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
 -{
 -      struct drm_i915_private *dev_priv = to_i915(dev);
 -      int ret;
 -
 -      if (seqno == 0)
 -              return -EINVAL;
 -
 -      /* HWS page needs to be set less than what we
 -       * will inject to ring
 -       */
 -      ret = i915_gem_init_seqno(dev_priv, seqno - 1);
 -      if (ret)
 -              return ret;
 -
 -      /* Carefully set the last_seqno value so that wrap
 -       * detection still works
 -       */
 -      dev_priv->next_seqno = seqno;
 -      dev_priv->last_seqno = seqno - 1;
 -      if (dev_priv->last_seqno == 0)
 -              dev_priv->last_seqno--;
 -
 -      return 0;
 -}
 -
 -int
 -i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
 -{
 -      /* reserve 0 for non-seqno */
 -      if (dev_priv->next_seqno == 0) {
 -              int ret = i915_gem_init_seqno(dev_priv, 0);
 -              if (ret)
 -                      return ret;
 -
 -              dev_priv->next_seqno = 1;
 -      }
 -
 -      *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
 -      return 0;
 -}
 -
 -static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
 -{
 -      struct drm_i915_private *dev_priv = engine->i915;
 -
 -      dev_priv->gt.active_engines |= intel_engine_flag(engine);
 -      if (dev_priv->gt.awake)
 -              return;
 -
 -      intel_runtime_pm_get_noresume(dev_priv);
 -      dev_priv->gt.awake = true;
 -
 -      i915_update_gfx_val(dev_priv);
 -      if (INTEL_GEN(dev_priv) >= 6)
 -              gen6_rps_busy(dev_priv);
 -
 -      queue_delayed_work(dev_priv->wq,
 -                         &dev_priv->gt.retire_work,
 -                         round_jiffies_up_relative(HZ));
 -}
 -
 -/*
 - * NB: This function is not allowed to fail. Doing so would mean the the
 - * request is not being tracked for completion but the work itself is
 - * going to happen on the hardware. This would be a Bad Thing(tm).
 - */
 -void __i915_add_request(struct drm_i915_gem_request *request,
 -                      struct drm_i915_gem_object *obj,
 -                      bool flush_caches)
 -{
 -      struct intel_engine_cs *engine;
 -      struct intel_ringbuffer *ringbuf;
 -      u32 request_start;
 -      u32 reserved_tail;
 -      int ret;
 -
 -      if (WARN_ON(request == NULL))
 -              return;
 -
 -      engine = request->engine;
 -      ringbuf = request->ringbuf;
 -
 -      /*
 -       * To ensure that this call will not fail, space for its emissions
 -       * should already have been reserved in the ring buffer. Let the ring
 -       * know that it is time to use that space up.
 -       */
 -      request_start = intel_ring_get_tail(ringbuf);
 -      reserved_tail = request->reserved_space;
 -      request->reserved_space = 0;
 -
 -      /*
 -       * Emit any outstanding flushes - execbuf can fail to emit the flush
 -       * after having emitted the batchbuffer command. Hence we need to fix
 -       * things up similar to emitting the lazy request. The difference here
 -       * is that the flush _must_ happen before the next request, no matter
 -       * what.
 -       */
 -      if (flush_caches) {
 -              if (i915.enable_execlists)
 -                      ret = logical_ring_flush_all_caches(request);
 -              else
 -                      ret = intel_ring_flush_all_caches(request);
 -              /* Not allowed to fail! */
 -              WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
 -      }
 -
 -      trace_i915_gem_request_add(request);
 +      int idx = request->engine->id;
 +      struct drm_i915_gem_object *obj =
 +              container_of(active, struct drm_i915_gem_object, last_read[idx]);
  
 -      request->head = request_start;
 +      GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
  
 -      /* Whilst this request exists, batch_obj will be on the
 -       * active_list, and so will hold the active reference. Only when this
 -       * request is retired will the the batch_obj be moved onto the
 -       * inactive_list and lose its active reference. Hence we do not need
 -       * to explicitly hold another reference here.
 -       */
 -      request->batch_obj = obj;
 +      i915_gem_object_clear_active(obj, idx);
 +      if (i915_gem_object_is_active(obj))
 +              return;
  
 -      /* Seal the request and mark it as pending execution. Note that
 -       * we may inspect this state, without holding any locks, during
 -       * hangcheck. Hence we apply the barrier to ensure that we do not
 -       * see a more recent value in the hws than we are tracking.
 -       */
 -      request->emitted_jiffies = jiffies;
 -      request->previous_seqno = engine->last_submitted_seqno;
 -      smp_store_mb(engine->last_submitted_seqno, request->seqno);
 -      list_add_tail(&request->list, &engine->request_list);
 -
 -      /* Record the position of the start of the request so that
 -       * should we detect the updated seqno part-way through the
 -       * GPU processing the request, we never over-estimate the
 -       * position of the head.
 +      /* Bump our place on the bound list to keep it roughly in LRU order
 +       * so that we don't steal from recently used but inactive objects
 +       * (unless we are forced to ofc!)
         */
 -      request->postfix = intel_ring_get_tail(ringbuf);
 -
 -      if (i915.enable_execlists)
 -              ret = engine->emit_request(request);
 -      else {
 -              ret = engine->add_request(request);
 +      if (obj->bind_count)
 +              list_move_tail(&obj->global_list,
 +                             &request->i915->mm.bound_list);
  
 -              request->tail = intel_ring_get_tail(ringbuf);
 -      }
 -      /* Not allowed to fail! */
 -      WARN(ret, "emit|add_request failed: %d!\n", ret);
 -      /* Sanity check that the reserved size was large enough. */
 -      ret = intel_ring_get_tail(ringbuf) - request_start;
 -      if (ret < 0)
 -              ret += ringbuf->size;
 -      WARN_ONCE(ret > reserved_tail,
 -                "Not enough space reserved (%d bytes) "
 -                "for adding the request (%d bytes)\n",
 -                reserved_tail, ret);
 -
 -      i915_gem_mark_busy(engine);
 +      i915_gem_object_put(obj);
  }
  
  static bool i915_context_is_banned(const struct i915_gem_context *ctx)
@@@ -2532,6 -2981,101 +2532,6 @@@ static void i915_set_reset_status(struc
        }
  }
  
 -void i915_gem_request_free(struct kref *req_ref)
 -{
 -      struct drm_i915_gem_request *req = container_of(req_ref,
 -                                               typeof(*req), ref);
 -      kmem_cache_free(req->i915->requests, req);
 -}
 -
 -static inline int
 -__i915_gem_request_alloc(struct intel_engine_cs *engine,
 -                       struct i915_gem_context *ctx,
 -                       struct drm_i915_gem_request **req_out)
 -{
 -      struct drm_i915_private *dev_priv = engine->i915;
 -      unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 -      struct drm_i915_gem_request *req;
 -      int ret;
 -
 -      if (!req_out)
 -              return -EINVAL;
 -
 -      *req_out = NULL;
 -
 -      /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 -       * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
 -       * and restart.
 -       */
 -      ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
 -      if (ret)
 -              return ret;
 -
 -      req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
 -      if (req == NULL)
 -              return -ENOMEM;
 -
 -      ret = i915_gem_get_seqno(engine->i915, &req->seqno);
 -      if (ret)
 -              goto err;
 -
 -      kref_init(&req->ref);
 -      req->i915 = dev_priv;
 -      req->engine = engine;
 -      req->ctx  = ctx;
 -      i915_gem_context_reference(req->ctx);
 -
 -      /*
 -       * Reserve space in the ring buffer for all the commands required to
 -       * eventually emit this request. This is to guarantee that the
 -       * i915_add_request() call can't fail. Note that the reserve may need
 -       * to be redone if the request is not actually submitted straight
 -       * away, e.g. because a GPU scheduler has deferred it.
 -       */
 -      req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
 -
 -      if (i915.enable_execlists)
 -              ret = intel_logical_ring_alloc_request_extras(req);
 -      else
 -              ret = intel_ring_alloc_request_extras(req);
 -      if (ret)
 -              goto err_ctx;
 -
 -      *req_out = req;
 -      return 0;
 -
 -err_ctx:
 -      i915_gem_context_unreference(ctx);
 -err:
 -      kmem_cache_free(dev_priv->requests, req);
 -      return ret;
 -}
 -
 -/**
 - * i915_gem_request_alloc - allocate a request structure
 - *
 - * @engine: engine that we wish to issue the request on.
 - * @ctx: context that the request will be associated with.
 - *       This can be NULL if the request is not directly related to
 - *       any specific user context, in which case this function will
 - *       choose an appropriate context to use.
 - *
 - * Returns a pointer to the allocated request if successful,
 - * or an error code if not.
 - */
 -struct drm_i915_gem_request *
 -i915_gem_request_alloc(struct intel_engine_cs *engine,
 -                     struct i915_gem_context *ctx)
 -{
 -      struct drm_i915_gem_request *req;
 -      int err;
 -
 -      if (ctx == NULL)
 -              ctx = engine->i915->kernel_context;
 -      err = __i915_gem_request_alloc(engine, ctx, &req);
 -      return err ? ERR_PTR(err) : req;
 -}
 -
  struct drm_i915_gem_request *
  i915_gem_find_active_request(struct intel_engine_cs *engine)
  {
         * extra delay for a recent interrupt is pointless. Hence, we do
         * not need an engine->irq_seqno_barrier() before the seqno reads.
         */
 -      list_for_each_entry(request, &engine->request_list, list) {
 +      list_for_each_entry(request, &engine->request_list, link) {
                if (i915_gem_request_completed(request))
                        continue;
  
 +              if (!i915_sw_fence_done(&request->submit))
 +                      break;
 +
                return request;
        }
  
        return NULL;
  }
  
 -static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
 +static void reset_request(struct drm_i915_gem_request *request)
 +{
 +      void *vaddr = request->ring->vaddr;
 +      u32 head;
 +
 +      /* As this request likely depends on state from the lost
 +       * context, clear out all the user operations leaving the
 +       * breadcrumb at the end (so we get the fence notifications).
 +       */
 +      head = request->head;
 +      if (request->postfix < head) {
 +              memset(vaddr + head, 0, request->ring->size - head);
 +              head = 0;
 +      }
 +      memset(vaddr + head, 0, request->postfix - head);
 +}
 +
 +static void i915_gem_reset_engine(struct intel_engine_cs *engine)
  {
        struct drm_i915_gem_request *request;
 +      struct i915_gem_context *incomplete_ctx;
        bool ring_hung;
  
 +      /* Ensure irq handler finishes, and not run again. */
 +      tasklet_kill(&engine->irq_tasklet);
 +      if (engine->irq_seqno_barrier)
 +              engine->irq_seqno_barrier(engine);
 +
        request = i915_gem_find_active_request(engine);
 -      if (request == NULL)
 +      if (!request)
                return;
  
        ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
 -
        i915_set_reset_status(request->ctx, ring_hung);
 -      list_for_each_entry_continue(request, &engine->request_list, list)
 -              i915_set_reset_status(request->ctx, false);
 -}
 -
 -static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 -{
 -      struct intel_ringbuffer *buffer;
 -
 -      while (!list_empty(&engine->active_list)) {
 -              struct drm_i915_gem_object *obj;
 -
 -              obj = list_first_entry(&engine->active_list,
 -                                     struct drm_i915_gem_object,
 -                                     engine_list[engine->id]);
 -
 -              i915_gem_object_retire__read(obj, engine->id);
 -      }
 -
 -      /*
 -       * Clear the execlists queue up before freeing the requests, as those
 -       * are the ones that keep the context and ringbuffer backing objects
 -       * pinned in place.
 -       */
 -
 -      if (i915.enable_execlists) {
 -              /* Ensure irq handler finishes or is cancelled. */
 -              tasklet_kill(&engine->irq_tasklet);
 -
 -              intel_execlists_cancel_requests(engine);
 -      }
 -
 -      /*
 -       * We must free the requests after all the corresponding objects have
 -       * been moved off active lists. Which is the same order as the normal
 -       * retire_requests function does. This is important if object hold
 -       * implicit references on things like e.g. ppgtt address spaces through
 -       * the request.
 -       */
 -      while (!list_empty(&engine->request_list)) {
 -              struct drm_i915_gem_request *request;
 +      if (!ring_hung)
 +              return;
  
 -              request = list_first_entry(&engine->request_list,
 -                                         struct drm_i915_gem_request,
 -                                         list);
 +      DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
 +                       engine->name, request->fence.seqno);
  
 -              i915_gem_request_retire(request);
 -      }
 +      /* Setup the CS to resume from the breadcrumb of the hung request */
 +      engine->reset_hw(engine, request);
  
 -      /* Having flushed all requests from all queues, we know that all
 -       * ringbuffers must now be empty. However, since we do not reclaim
 -       * all space when retiring the request (to prevent HEADs colliding
 -       * with rapid ringbuffer wraparound) the amount of available space
 -       * upon reset is less than when we start. Do one more pass over
 -       * all the ringbuffers to reset last_retired_head.
 +      /* Users of the default context do not rely on logical state
 +       * preserved between batches. They have to emit full state on
 +       * every batch and so it is safe to execute queued requests following
 +       * the hang.
 +       *
 +       * Other contexts preserve state, now corrupt. We want to skip all
 +       * queued requests that reference the corrupt context.
         */
 -      list_for_each_entry(buffer, &engine->buffers, link) {
 -              buffer->last_retired_head = buffer->tail;
 -              intel_ring_update_space(buffer);
 -      }
 +      incomplete_ctx = request->ctx;
 +      if (i915_gem_context_is_default(incomplete_ctx))
 +              return;
  
 -      intel_ring_init_seqno(engine, engine->last_submitted_seqno);
 +      list_for_each_entry_continue(request, &engine->request_list, link)
 +              if (request->ctx == incomplete_ctx)
 +                      reset_request(request);
+       engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
  }
  
 -void i915_gem_reset(struct drm_device *dev)
 +void i915_gem_reset(struct drm_i915_private *dev_priv)
  {
 -      struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_engine_cs *engine;
  
 -      /*
 -       * Before we free the objects from the requests, we need to inspect
 -       * them for finding the guilty party. As the requests only borrow
 -       * their reference to the objects, the inspection must be done first.
 -       */
 -      for_each_engine(engine, dev_priv)
 -              i915_gem_reset_engine_status(engine);
 +      i915_gem_retire_requests(dev_priv);
  
        for_each_engine(engine, dev_priv)
 -              i915_gem_reset_engine_cleanup(engine);
 +              i915_gem_reset_engine(engine);
+       mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
  
 -      i915_gem_context_reset(dev);
 -
 -      i915_gem_restore_fences(dev);
 +      i915_gem_restore_fences(&dev_priv->drm);
 +}
  
 -      WARN_ON(i915_verify_lists(dev));
 +static void nop_submit_request(struct drm_i915_gem_request *request)
 +{
  }
  
 -/**
 - * This function clears the request list as sequence numbers are passed.
 - * @engine: engine to retire requests on
 - */
 -void
 -i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
 +static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
  {
 -      WARN_ON(i915_verify_lists(engine->dev));
 +      engine->submit_request = nop_submit_request;
  
 -      /* Retire requests first as we use it above for the early return.
 -       * If we retire requests last, we may use a later seqno and so clear
 -       * the requests lists without clearing the active list, leading to
 -       * confusion.
 +      /* Mark all pending requests as complete so that any concurrent
 +       * (lockless) lookup doesn't try and wait upon the request as we
 +       * reset it.
         */
 -      while (!list_empty(&engine->request_list)) {
 -              struct drm_i915_gem_request *request;
 -
 -              request = list_first_entry(&engine->request_list,
 -                                         struct drm_i915_gem_request,
 -                                         list);
 -
 -              if (!i915_gem_request_completed(request))
 -                      break;
 -
 -              i915_gem_request_retire(request);
 -      }
 +      intel_engine_init_seqno(engine, engine->last_submitted_seqno);
  
 -      /* Move any buffers on the active list that are no longer referenced
 -       * by the ringbuffer to the flushing/inactive lists as appropriate,
 -       * before we free the context associated with the requests.
 +      /*
 +       * Clear the execlists queue up before freeing the requests, as those
 +       * are the ones that keep the context and ringbuffer backing objects
 +       * pinned in place.
         */
 -      while (!list_empty(&engine->active_list)) {
 -              struct drm_i915_gem_object *obj;
 -
 -              obj = list_first_entry(&engine->active_list,
 -                                     struct drm_i915_gem_object,
 -                                     engine_list[engine->id]);
 -
 -              if (!list_empty(&obj->last_read_req[engine->id]->list))
 -                      break;
  
 -              i915_gem_object_retire__read(obj, engine->id);
 +      if (i915.enable_execlists) {
 +              spin_lock(&engine->execlist_lock);
 +              INIT_LIST_HEAD(&engine->execlist_queue);
 +              i915_gem_request_put(engine->execlist_port[0].request);
 +              i915_gem_request_put(engine->execlist_port[1].request);
 +              memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
 +              spin_unlock(&engine->execlist_lock);
        }
  
 -      WARN_ON(i915_verify_lists(engine->dev));
 +      engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
  }
  
 -void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
 +void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
  {
        struct intel_engine_cs *engine;
  
        lockdep_assert_held(&dev_priv->drm.struct_mutex);
 +      set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
  
 -      if (dev_priv->gt.active_engines == 0)
 -              return;
 -
 -      GEM_BUG_ON(!dev_priv->gt.awake);
 -
 -      for_each_engine(engine, dev_priv) {
 -              i915_gem_retire_requests_ring(engine);
 -              if (list_empty(&engine->request_list))
 -                      dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
 -      }
 +      i915_gem_context_lost(dev_priv);
 +      for_each_engine(engine, dev_priv)
 +              i915_gem_cleanup_engine(engine);
 +      mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
  
 -      if (dev_priv->gt.active_engines == 0)
 -              queue_delayed_work(dev_priv->wq,
 -                                 &dev_priv->gt.idle_work,
 -                                 msecs_to_jiffies(100));
 +      i915_gem_retire_requests(dev_priv);
  }
  
  static void
@@@ -2694,12 -3287,10 +2697,12 @@@ i915_gem_retire_work_handler(struct wor
         * We do not need to do this test under locking as in the worst-case
         * we queue the retire worker once too often.
         */
 -      if (READ_ONCE(dev_priv->gt.awake))
 +      if (READ_ONCE(dev_priv->gt.awake)) {
 +              i915_queue_hangcheck(dev_priv);
                queue_delayed_work(dev_priv->wq,
                                   &dev_priv->gt.retire_work,
                                   round_jiffies_up_relative(HZ));
 +      }
  }
  
  static void
@@@ -2709,6 -3300,7 +2712,6 @@@ i915_gem_idle_work_handler(struct work_
                container_of(work, typeof(*dev_priv), gt.idle_work.work);
        struct drm_device *dev = &dev_priv->drm;
        struct intel_engine_cs *engine;
 -      unsigned int stuck_engines;
        bool rearm_hangcheck;
  
        if (!READ_ONCE(dev_priv->gt.awake))
                cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
  
        if (!mutex_trylock(&dev->struct_mutex)) {
 -              /* Currently busy, come back later */
 -              mod_delayed_work(dev_priv->wq,
 -                               &dev_priv->gt.idle_work,
 -                               msecs_to_jiffies(50));
 -              goto out_rearm;
 -      }
 -
 -      if (dev_priv->gt.active_engines)
 -              goto out_unlock;
 -
 -      for_each_engine(engine, dev_priv)
 -              i915_gem_batch_pool_fini(&engine->batch_pool);
 -
 -      GEM_BUG_ON(!dev_priv->gt.awake);
 -      dev_priv->gt.awake = false;
 -      rearm_hangcheck = false;
 -
 -      stuck_engines = intel_kick_waiters(dev_priv);
 -      if (unlikely(stuck_engines)) {
 -              DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n");
 -              dev_priv->gpu_error.missed_irq_rings |= stuck_engines;
 -      }
 -
 -      if (INTEL_GEN(dev_priv) >= 6)
 -              gen6_rps_idle(dev_priv);
 -      intel_runtime_pm_put(dev_priv);
 -out_unlock:
 -      mutex_unlock(&dev->struct_mutex);
 -
 -out_rearm:
 -      if (rearm_hangcheck) {
 -              GEM_BUG_ON(!dev_priv->gt.awake);
 -              i915_queue_hangcheck(dev_priv);
 -      }
 -}
 -
 -/**
 - * Ensures that an object will eventually get non-busy by flushing any required
 - * write domains, emitting any outstanding lazy request and retiring and
 - * completed requests.
 - * @obj: object to flush
 - */
 -static int
 -i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 -{
 -      int i;
 -
 -      if (!obj->active)
 -              return 0;
 -
 -      for (i = 0; i < I915_NUM_ENGINES; i++) {
 -              struct drm_i915_gem_request *req;
 -
 -              req = obj->last_read_req[i];
 -              if (req == NULL)
 -                      continue;
 -
 -              if (i915_gem_request_completed(req))
 -                      i915_gem_object_retire__read(obj, i);
 -      }
 -
 -      return 0;
 -}
 -
 -/**
 - * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
 - * @dev: drm device pointer
 - * @data: ioctl data blob
 - * @file: drm file pointer
 - *
 - * Returns 0 if successful, else an error is returned with the remaining time in
 - * the timeout parameter.
 - *  -ETIME: object is still busy after timeout
 - *  -ERESTARTSYS: signal interrupted the wait
 - *  -ENONENT: object doesn't exist
 - * Also possible, but rare:
 - *  -EAGAIN: GPU wedged
 - *  -ENOMEM: damn
 - *  -ENODEV: Internal IRQ fail
 - *  -E?: The add request failed
 - *
 - * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
 - * non-zero timeout parameter the wait ioctl will wait for the given number of
 - * nanoseconds on an object becoming unbusy. Since the wait itself does so
 - * without holding struct_mutex the object may become re-busied before this
 - * function completes. A similar but shorter * race condition exists in the busy
 - * ioctl
 - */
 -int
 -i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 -{
 -      struct drm_i915_gem_wait *args = data;
 -      struct drm_i915_gem_object *obj;
 -      struct drm_i915_gem_request *req[I915_NUM_ENGINES];
 -      int i, n = 0;
 -      int ret;
 -
 -      if (args->flags != 0)
 -              return -EINVAL;
 -
 -      ret = i915_mutex_lock_interruptible(dev);
 -      if (ret)
 -              return ret;
 -
 -      obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle));
 -      if (&obj->base == NULL) {
 -              mutex_unlock(&dev->struct_mutex);
 -              return -ENOENT;
 -      }
 -
 -      /* Need to make sure the object gets inactive eventually. */
 -      ret = i915_gem_object_flush_active(obj);
 -      if (ret)
 -              goto out;
 -
 -      if (!obj->active)
 -              goto out;
 -
 -      /* Do this after OLR check to make sure we make forward progress polling
 -       * on this IOCTL with a timeout == 0 (like busy ioctl)
 -       */
 -      if (args->timeout_ns == 0) {
 -              ret = -ETIME;
 -              goto out;
 -      }
 -
 -      drm_gem_object_unreference(&obj->base);
 -
 -      for (i = 0; i < I915_NUM_ENGINES; i++) {
 -              if (obj->last_read_req[i] == NULL)
 -                      continue;
 -
 -              req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
 -      }
 -
 -      mutex_unlock(&dev->struct_mutex);
 -
 -      for (i = 0; i < n; i++) {
 -              if (ret == 0)
 -                      ret = __i915_wait_request(req[i], true,
 -                                                args->timeout_ns > 0 ? &args->timeout_ns : NULL,
 -                                                to_rps_client(file));
 -              i915_gem_request_unreference(req[i]);
 -      }
 -      return ret;
 -
 -out:
 -      drm_gem_object_unreference(&obj->base);
 -      mutex_unlock(&dev->struct_mutex);
 -      return ret;
 -}
 -
 -static int
 -__i915_gem_object_sync(struct drm_i915_gem_object *obj,
 -                     struct intel_engine_cs *to,
 -                     struct drm_i915_gem_request *from_req,
 -                     struct drm_i915_gem_request **to_req)
 -{
 -      struct intel_engine_cs *from;
 -      int ret;
 -
 -      from = i915_gem_request_get_engine(from_req);
 -      if (to == from)
 -              return 0;
 -
 -      if (i915_gem_request_completed(from_req))
 -              return 0;
 -
 -      if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) {
 -              struct drm_i915_private *i915 = to_i915(obj->base.dev);
 -              ret = __i915_wait_request(from_req,
 -                                        i915->mm.interruptible,
 -                                        NULL,
 -                                        &i915->rps.semaphores);
 -              if (ret)
 -                      return ret;
 -
 -              i915_gem_object_retire_request(obj, from_req);
 -      } else {
 -              int idx = intel_ring_sync_index(from, to);
 -              u32 seqno = i915_gem_request_get_seqno(from_req);
 -
 -              WARN_ON(!to_req);
 -
 -              if (seqno <= from->semaphore.sync_seqno[idx])
 -                      return 0;
 +              /* Currently busy, come back later */
 +              mod_delayed_work(dev_priv->wq,
 +                               &dev_priv->gt.idle_work,
 +                               msecs_to_jiffies(50));
 +              goto out_rearm;
 +      }
  
 -              if (*to_req == NULL) {
 -                      struct drm_i915_gem_request *req;
 +      if (dev_priv->gt.active_engines)
 +              goto out_unlock;
  
 -                      req = i915_gem_request_alloc(to, NULL);
 -                      if (IS_ERR(req))
 -                              return PTR_ERR(req);
 +      for_each_engine(engine, dev_priv)
 +              i915_gem_batch_pool_fini(&engine->batch_pool);
  
 -                      *to_req = req;
 -              }
 +      GEM_BUG_ON(!dev_priv->gt.awake);
 +      dev_priv->gt.awake = false;
 +      rearm_hangcheck = false;
  
 -              trace_i915_gem_ring_sync_to(*to_req, from, from_req);
 -              ret = to->semaphore.sync_to(*to_req, from, seqno);
 -              if (ret)
 -                      return ret;
 +      if (INTEL_GEN(dev_priv) >= 6)
 +              gen6_rps_idle(dev_priv);
 +      intel_runtime_pm_put(dev_priv);
 +out_unlock:
 +      mutex_unlock(&dev->struct_mutex);
  
 -              /* We use last_read_req because sync_to()
 -               * might have just caused seqno wrap under
 -               * the radar.
 -               */
 -              from->semaphore.sync_seqno[idx] =
 -                      i915_gem_request_get_seqno(obj->last_read_req[from->id]);
 +out_rearm:
 +      if (rearm_hangcheck) {
 +              GEM_BUG_ON(!dev_priv->gt.awake);
 +              i915_queue_hangcheck(dev_priv);
        }
 +}
  
 -      return 0;
 +void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 +{
 +      struct drm_i915_gem_object *obj = to_intel_bo(gem);
 +      struct drm_i915_file_private *fpriv = file->driver_priv;
 +      struct i915_vma *vma, *vn;
 +
 +      mutex_lock(&obj->base.dev->struct_mutex);
 +      list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
 +              if (vma->vm->file == fpriv)
 +                      i915_vma_close(vma);
 +      mutex_unlock(&obj->base.dev->struct_mutex);
  }
  
  /**
 - * i915_gem_object_sync - sync an object to a ring.
 - *
 - * @obj: object which may be in use on another ring.
 - * @to: ring we wish to use the object on. May be NULL.
 - * @to_req: request we wish to use the object for. See below.
 - *          This will be allocated and returned if a request is
 - *          required but not passed in.
 - *
 - * This code is meant to abstract object synchronization with the GPU.
 - * Calling with NULL implies synchronizing the object with the CPU
 - * rather than a particular GPU ring. Conceptually we serialise writes
 - * between engines inside the GPU. We only allow one engine to write
 - * into a buffer at any time, but multiple readers. To ensure each has
 - * a coherent view of memory, we must:
 - *
 - * - If there is an outstanding write request to the object, the new
 - *   request must wait for it to complete (either CPU or in hw, requests
 - *   on the same ring will be naturally ordered).
 - *
 - * - If we are a write request (pending_write_domain is set), the new
 - *   request must wait for outstanding read requests to complete.
 + * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
 + * @dev: drm device pointer
 + * @data: ioctl data blob
 + * @file: drm file pointer
   *
 - * For CPU synchronisation (NULL to) no request is required. For syncing with
 - * rings to_req must be non-NULL. However, a request does not have to be
 - * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
 - * request will be allocated automatically and returned through *to_req. Note
 - * that it is not guaranteed that commands will be emitted (because the system
 - * might already be idle). Hence there is no need to create a request that
 - * might never have any work submitted. Note further that if a request is
 - * returned in *to_req, it is the responsibility of the caller to submit
 - * that request (after potentially adding more work to it).
 + * Returns 0 if successful, else an error is returned with the remaining time in
 + * the timeout parameter.
 + *  -ETIME: object is still busy after timeout
 + *  -ERESTARTSYS: signal interrupted the wait
 + *  -ENONENT: object doesn't exist
 + * Also possible, but rare:
 + *  -EAGAIN: GPU wedged
 + *  -ENOMEM: damn
 + *  -ENODEV: Internal IRQ fail
 + *  -E?: The add request failed
   *
 - * Returns 0 if successful, else propagates up the lower layer error.
 + * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
 + * non-zero timeout parameter the wait ioctl will wait for the given number of
 + * nanoseconds on an object becoming unbusy. Since the wait itself does so
 + * without holding struct_mutex the object may become re-busied before this
 + * function completes. A similar but shorter * race condition exists in the busy
 + * ioctl
   */
  int
 -i915_gem_object_sync(struct drm_i915_gem_object *obj,
 -                   struct intel_engine_cs *to,
 -                   struct drm_i915_gem_request **to_req)
 +i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
  {
 -      const bool readonly = obj->base.pending_write_domain == 0;
 -      struct drm_i915_gem_request *req[I915_NUM_ENGINES];
 -      int ret, i, n;
 +      struct drm_i915_gem_wait *args = data;
 +      struct intel_rps_client *rps = to_rps_client(file);
 +      struct drm_i915_gem_object *obj;
 +      unsigned long active;
 +      int idx, ret = 0;
  
 -      if (!obj->active)
 -              return 0;
 +      if (args->flags != 0)
 +              return -EINVAL;
  
 -      if (to == NULL)
 -              return i915_gem_object_wait_rendering(obj, readonly);
 +      obj = i915_gem_object_lookup(file, args->bo_handle);
 +      if (!obj)
 +              return -ENOENT;
  
 -      n = 0;
 -      if (readonly) {
 -              if (obj->last_write_req)
 -                      req[n++] = obj->last_write_req;
 -      } else {
 -              for (i = 0; i < I915_NUM_ENGINES; i++)
 -                      if (obj->last_read_req[i])
 -                              req[n++] = obj->last_read_req[i];
 -      }
 -      for (i = 0; i < n; i++) {
 -              ret = __i915_gem_object_sync(obj, to, req[i], to_req);
 +      active = __I915_BO_ACTIVE(obj);
 +      for_each_active(active, idx) {
 +              s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
 +              ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
 +                                                  I915_WAIT_INTERRUPTIBLE,
 +                                                  timeout, rps);
                if (ret)
 -                      return ret;
 +                      break;
        }
  
 -      return 0;
 -}
 -
 -static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
 -{
 -      u32 old_write_domain, old_read_domains;
 -
 -      /* Force a pagefault for domain tracking on next user access */
 -      i915_gem_release_mmap(obj);
 -
 -      if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
 -              return;
 -
 -      old_read_domains = obj->base.read_domains;
 -      old_write_domain = obj->base.write_domain;
 -
 -      obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
 -      obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
 -
 -      trace_i915_gem_object_change_domain(obj,
 -                                          old_read_domains,
 -                                          old_write_domain);
 +      i915_gem_object_put_unlocked(obj);
 +      return ret;
  }
  
  static void __i915_vma_iounmap(struct i915_vma *vma)
  {
 -      GEM_BUG_ON(vma->pin_count);
 +      GEM_BUG_ON(i915_vma_is_pinned(vma));
  
        if (vma->iomap == NULL)
                return;
        vma->iomap = NULL;
  }
  
 -static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 +int i915_vma_unbind(struct i915_vma *vma)
  {
        struct drm_i915_gem_object *obj = vma->obj;
 -      struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 +      unsigned long active;
        int ret;
  
 -      if (list_empty(&vma->obj_link))
 -              return 0;
 -
 -      if (!drm_mm_node_allocated(&vma->node)) {
 -              i915_gem_vma_destroy(vma);
 -              return 0;
 -      }
 -
 -      if (vma->pin_count)
 -              return -EBUSY;
 +      /* First wait upon any activity as retiring the request may
 +       * have side-effects such as unpinning or even unbinding this vma.
 +       */
 +      active = i915_vma_get_active(vma);
 +      if (active) {
 +              int idx;
 +
 +              /* When a closed VMA is retired, it is unbound - eek.
 +               * In order to prevent it from being recursively closed,
 +               * take a pin on the vma so that the second unbind is
 +               * aborted.
 +               */
 +              __i915_vma_pin(vma);
  
 -      BUG_ON(obj->pages == NULL);
 +              for_each_active(active, idx) {
 +                      ret = i915_gem_active_retire(&vma->last_read[idx],
 +                                                 &vma->vm->dev->struct_mutex);
 +                      if (ret)
 +                              break;
 +              }
  
 -      if (wait) {
 -              ret = i915_gem_object_wait_rendering(obj, false);
 +              __i915_vma_unpin(vma);
                if (ret)
                        return ret;
 +
 +              GEM_BUG_ON(i915_vma_is_active(vma));
        }
  
 -      if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
 -              i915_gem_object_finish_gtt(obj);
 +      if (i915_vma_is_pinned(vma))
 +              return -EBUSY;
 +
 +      if (!drm_mm_node_allocated(&vma->node))
 +              goto destroy;
  
 +      GEM_BUG_ON(obj->bind_count == 0);
 +      GEM_BUG_ON(!obj->pages);
 +
 +      if (i915_vma_is_map_and_fenceable(vma)) {
                /* release the fence reg _after_ flushing */
 -              ret = i915_gem_object_put_fence(obj);
 +              ret = i915_vma_put_fence(vma);
                if (ret)
                        return ret;
  
 +              /* Force a pagefault for domain tracking on next user access */
 +              i915_gem_release_mmap(obj);
 +
                __i915_vma_iounmap(vma);
 +              vma->flags &= ~I915_VMA_CAN_FENCE;
        }
  
 -      trace_i915_vma_unbind(vma);
 -
 -      vma->vm->unbind_vma(vma);
 -      vma->bound = 0;
 -
 -      list_del_init(&vma->vm_link);
 -      if (vma->is_ggtt) {
 -              if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
 -                      obj->map_and_fenceable = false;
 -              } else if (vma->ggtt_view.pages) {
 -                      sg_free_table(vma->ggtt_view.pages);
 -                      kfree(vma->ggtt_view.pages);
 -              }
 -              vma->ggtt_view.pages = NULL;
 +      if (likely(!vma->vm->closed)) {
 +              trace_i915_vma_unbind(vma);
 +              vma->vm->unbind_vma(vma);
        }
 +      vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
  
        drm_mm_remove_node(&vma->node);
 -      i915_gem_vma_destroy(vma);
 +      list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
 +
 +      if (vma->pages != obj->pages) {
 +              GEM_BUG_ON(!vma->pages);
 +              sg_free_table(vma->pages);
 +              kfree(vma->pages);
 +      }
 +      vma->pages = NULL;
  
        /* Since the unbound list is global, only move to that list if
         * no more VMAs exist. */
 -      if (list_empty(&obj->vma_list))
 -              list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
 +      if (--obj->bind_count == 0)
 +              list_move_tail(&obj->global_list,
 +                             &to_i915(obj->base.dev)->mm.unbound_list);
  
        /* And finally now the object is completely decoupled from this vma,
         * we can drop its hold on the backing storage and allow it to be
         */
        i915_gem_object_unpin_pages(obj);
  
 -      return 0;
 -}
 -
 -int i915_vma_unbind(struct i915_vma *vma)
 -{
 -      return __i915_vma_unbind(vma, true);
 -}
 +destroy:
 +      if (unlikely(i915_vma_is_closed(vma)))
 +              i915_vma_destroy(vma);
  
 -int __i915_vma_unbind_no_wait(struct i915_vma *vma)
 -{
 -      return __i915_vma_unbind(vma, false);
 +      return 0;
  }
  
 -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv)
 +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 +                         unsigned int flags)
  {
        struct intel_engine_cs *engine;
        int ret;
  
 -      lockdep_assert_held(&dev_priv->drm.struct_mutex);
 -
        for_each_engine(engine, dev_priv) {
                if (engine->last_context == NULL)
                        continue;
  
 -              ret = intel_engine_idle(engine);
 +              ret = intel_engine_idle(engine, flags);
                if (ret)
                        return ret;
        }
  
 -      WARN_ON(i915_verify_lists(dev));
        return 0;
  }
  
@@@ -2972,87 -3759,128 +2975,87 @@@ static bool i915_gem_valid_gtt_space(st
  }
  
  /**
 - * Finds free space in the GTT aperture and binds the object or a view of it
 - * there.
 - * @obj: object to bind
 - * @vm: address space to bind into
 - * @ggtt_view: global gtt view if applicable
 - * @alignment: requested alignment
 + * i915_vma_insert - finds a slot for the vma in its address space
 + * @vma: the vma
 + * @size: requested size in bytes (can be larger than the VMA)
 + * @alignment: required alignment
   * @flags: mask of PIN_* flags to use
 + *
 + * First we try to allocate some free space that meets the requirements for
 + * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
 + * preferrably the oldest idle entry to make room for the new VMA.
 + *
 + * Returns:
 + * 0 on success, negative error code otherwise.
   */
 -static struct i915_vma *
 -i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 -                         struct i915_address_space *vm,
 -                         const struct i915_ggtt_view *ggtt_view,
 -                         unsigned alignment,
 -                         uint64_t flags)
 +static int
 +i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
  {
 -      struct drm_device *dev = obj->base.dev;
 -      struct drm_i915_private *dev_priv = to_i915(dev);
 -      struct i915_ggtt *ggtt = &dev_priv->ggtt;
 -      u32 fence_alignment, unfenced_alignment;
 -      u32 search_flag, alloc_flag;
 +      struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
 +      struct drm_i915_gem_object *obj = vma->obj;
        u64 start, end;
 -      u64 size, fence_size;
 -      struct i915_vma *vma;
        int ret;
  
 -      if (i915_is_ggtt(vm)) {
 -              u32 view_size;
 -
 -              if (WARN_ON(!ggtt_view))
 -                      return ERR_PTR(-EINVAL);
 -
 -              view_size = i915_ggtt_view_size(obj, ggtt_view);
 -
 -              fence_size = i915_gem_get_gtt_size(dev,
 -                                                 view_size,
 -                                                 obj->tiling_mode);
 -              fence_alignment = i915_gem_get_gtt_alignment(dev,
 -                                                           view_size,
 -                                                           obj->tiling_mode,
 -                                                           true);
 -              unfenced_alignment = i915_gem_get_gtt_alignment(dev,
 -                                                              view_size,
 -                                                              obj->tiling_mode,
 -                                                              false);
 -              size = flags & PIN_MAPPABLE ? fence_size : view_size;
 -      } else {
 -              fence_size = i915_gem_get_gtt_size(dev,
 -                                                 obj->base.size,
 -                                                 obj->tiling_mode);
 -              fence_alignment = i915_gem_get_gtt_alignment(dev,
 -                                                           obj->base.size,
 -                                                           obj->tiling_mode,
 -                                                           true);
 -              unfenced_alignment =
 -                      i915_gem_get_gtt_alignment(dev,
 -                                                 obj->base.size,
 -                                                 obj->tiling_mode,
 -                                                 false);
 -              size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
 -      }
 +      GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 +      GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 +
 +      size = max(size, vma->size);
 +      if (flags & PIN_MAPPABLE)
 +              size = i915_gem_get_ggtt_size(dev_priv, size,
 +                                            i915_gem_object_get_tiling(obj));
 +
 +      alignment = max(max(alignment, vma->display_alignment),
 +                      i915_gem_get_ggtt_alignment(dev_priv, size,
 +                                                  i915_gem_object_get_tiling(obj),
 +                                                  flags & PIN_MAPPABLE));
  
        start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 -      end = vm->total;
 +
 +      end = vma->vm->total;
        if (flags & PIN_MAPPABLE)
 -              end = min_t(u64, end, ggtt->mappable_end);
 +              end = min_t(u64, end, dev_priv->ggtt.mappable_end);
        if (flags & PIN_ZONE_4G)
                end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
  
 -      if (alignment == 0)
 -              alignment = flags & PIN_MAPPABLE ? fence_alignment :
 -                                              unfenced_alignment;
 -      if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
 -              DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
 -                        ggtt_view ? ggtt_view->type : 0,
 -                        alignment);
 -              return ERR_PTR(-EINVAL);
 -      }
 -
        /* If binding the object/GGTT view requires more space than the entire
         * aperture has, reject it early before evicting everything in a vain
         * attempt to find space.
         */
        if (size > end) {
 -              DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
 -                        ggtt_view ? ggtt_view->type : 0,
 -                        size,
 +              DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
 +                        size, obj->base.size,
                          flags & PIN_MAPPABLE ? "mappable" : "total",
                          end);
 -              return ERR_PTR(-E2BIG);
 +              return -E2BIG;
        }
  
        ret = i915_gem_object_get_pages(obj);
        if (ret)
 -              return ERR_PTR(ret);
 +              return ret;
  
        i915_gem_object_pin_pages(obj);
  
 -      vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
 -                        i915_gem_obj_lookup_or_create_vma(obj, vm);
 -
 -      if (IS_ERR(vma))
 -              goto err_unpin;
 -
        if (flags & PIN_OFFSET_FIXED) {
 -              uint64_t offset = flags & PIN_OFFSET_MASK;
 -
 -              if (offset & (alignment - 1) || offset + size > end) {
 +              u64 offset = flags & PIN_OFFSET_MASK;
 +              if (offset & (alignment - 1) || offset > end - size) {
                        ret = -EINVAL;
 -                      goto err_free_vma;
 +                      goto err_unpin;
                }
 +
                vma->node.start = offset;
                vma->node.size = size;
                vma->node.color = obj->cache_level;
 -              ret = drm_mm_reserve_node(&vm->mm, &vma->node);
 +              ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
                if (ret) {
                        ret = i915_gem_evict_for_vma(vma);
                        if (ret == 0)
 -                              ret = drm_mm_reserve_node(&vm->mm, &vma->node);
 +                              ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
 +                      if (ret)
 +                              goto err_unpin;
                }
 -              if (ret)
 -                      goto err_free_vma;
        } else {
 +              u32 search_flag, alloc_flag;
 +
                if (flags & PIN_HIGH) {
                        search_flag = DRM_MM_SEARCH_BELOW;
                        alloc_flag = DRM_MM_CREATE_TOP;
                        alloc_flag = DRM_MM_CREATE_DEFAULT;
                }
  
 +              /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
 +               * so we know that we always have a minimum alignment of 4096.
 +               * The drm_mm range manager is optimised to return results
 +               * with zero alignment, so where possible use the optimal
 +               * path.
 +               */
 +              if (alignment <= 4096)
 +                      alignment = 0;
 +
  search_free:
 -              ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
 +              ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
 +                                                        &vma->node,
                                                          size, alignment,
                                                          obj->cache_level,
                                                          start, end,
                                                          search_flag,
                                                          alloc_flag);
                if (ret) {
 -                      ret = i915_gem_evict_something(dev, vm, size, alignment,
 +                      ret = i915_gem_evict_something(vma->vm, size, alignment,
                                                       obj->cache_level,
                                                       start, end,
                                                       flags);
                        if (ret == 0)
                                goto search_free;
  
 -                      goto err_free_vma;
 +                      goto err_unpin;
                }
        }
 -      if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
 -              ret = -EINVAL;
 -              goto err_remove_node;
 -      }
 -
 -      trace_i915_vma_bind(vma, flags);
 -      ret = i915_vma_bind(vma, obj->cache_level, flags);
 -      if (ret)
 -              goto err_remove_node;
 +      GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
  
        list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
 -      list_add_tail(&vma->vm_link, &vm->inactive_list);
 +      list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 +      obj->bind_count++;
  
 -      return vma;
 +      return 0;
  
 -err_remove_node:
 -      drm_mm_remove_node(&vma->node);
 -err_free_vma:
 -      i915_gem_vma_destroy(vma);
 -      vma = ERR_PTR(ret);
  err_unpin:
        i915_gem_object_unpin_pages(obj);
 -      return vma;
 +      return ret;
  }
  
  bool
@@@ -3144,72 -3974,51 +3147,72 @@@ i915_gem_clflush_object(struct drm_i915
  static void
  i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
  {
 -      uint32_t old_write_domain;
 +      struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
  
        if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
                return;
  
        /* No actual flushing is required for the GTT write domain.  Writes
 -       * to it immediately go to main memory as far as we know, so there's
 +       * to it "immediately" go to main memory as far as we know, so there's
         * no chipset flush.  It also doesn't land in render cache.
         *
         * However, we do have to enforce the order so that all writes through
         * the GTT land before any writes to the device, such as updates to
         * the GATT itself.
 +       *
 +       * We also have to wait a bit for the writes to land from the GTT.
 +       * An uncached read (i.e. mmio) seems to be ideal for the round-trip
 +       * timing. This issue has only been observed when switching quickly
 +       * between GTT writes and CPU reads from inside the kernel on recent hw,
 +       * and it appears to only affect discrete GTT blocks (i.e. on LLC
 +       * system agents we cannot reproduce this behaviour).
         */
        wmb();
 +      if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
 +              POSTING_READ(RING_ACTHD(dev_priv->engine[RCS].mmio_base));
  
 -      old_write_domain = obj->base.write_domain;
 -      obj->base.write_domain = 0;
 -
 -      intel_fb_obj_flush(obj, false, ORIGIN_GTT);
 +      intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
  
 +      obj->base.write_domain = 0;
        trace_i915_gem_object_change_domain(obj,
                                            obj->base.read_domains,
 -                                          old_write_domain);
 +                                          I915_GEM_DOMAIN_GTT);
  }
  
  /** Flushes the CPU write domain for the object if it's dirty. */
  static void
  i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
  {
 -      uint32_t old_write_domain;
 -
        if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
                return;
  
        if (i915_gem_clflush_object(obj, obj->pin_display))
                i915_gem_chipset_flush(to_i915(obj->base.dev));
  
 -      old_write_domain = obj->base.write_domain;
 -      obj->base.write_domain = 0;
 -
        intel_fb_obj_flush(obj, false, ORIGIN_CPU);
  
 +      obj->base.write_domain = 0;
        trace_i915_gem_object_change_domain(obj,
                                            obj->base.read_domains,
 -                                          old_write_domain);
 +                                          I915_GEM_DOMAIN_CPU);
 +}
 +
 +static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 +{
 +      struct i915_vma *vma;
 +
 +      list_for_each_entry(vma, &obj->vma_list, obj_link) {
 +              if (!i915_vma_is_ggtt(vma))
 +                      continue;
 +
 +              if (i915_vma_is_active(vma))
 +                      continue;
 +
 +              if (!drm_mm_node_allocated(&vma->node))
 +                      continue;
 +
 +              list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 +      }
  }
  
  /**
  int
  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
  {
 -      struct drm_device *dev = obj->base.dev;
 -      struct drm_i915_private *dev_priv = to_i915(dev);
 -      struct i915_ggtt *ggtt = &dev_priv->ggtt;
        uint32_t old_write_domain, old_read_domains;
 -      struct i915_vma *vma;
        int ret;
  
 -      if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
 -              return 0;
 -
        ret = i915_gem_object_wait_rendering(obj, !write);
        if (ret)
                return ret;
  
 +      if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
 +              return 0;
 +
        /* Flush and acquire obj->pages so that we are coherent through
         * direct access in memory with previous cached writes through
         * shmemfs and that our cache domain tracking remains valid.
                                            old_write_domain);
  
        /* And bump the LRU for this access */
 -      vma = i915_gem_obj_to_ggtt(obj);
 -      if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
 -              list_move_tail(&vma->vm_link,
 -                             &ggtt->base.inactive_list);
 +      i915_gem_object_bump_inactive_ggtt(obj);
  
        return 0;
  }
  int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                                    enum i915_cache_level cache_level)
  {
 -      struct drm_device *dev = obj->base.dev;
 -      struct i915_vma *vma, *next;
 -      bool bound = false;
 +      struct i915_vma *vma;
        int ret = 0;
  
        if (obj->cache_level == cache_level)
         * catch the issue of the CS prefetch crossing page boundaries and
         * reading an invalid PTE on older architectures.
         */
 -      list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
 +restart:
 +      list_for_each_entry(vma, &obj->vma_list, obj_link) {
                if (!drm_mm_node_allocated(&vma->node))
                        continue;
  
 -              if (vma->pin_count) {
 +              if (i915_vma_is_pinned(vma)) {
                        DRM_DEBUG("can not change the cache level of pinned objects\n");
                        return -EBUSY;
                }
  
 -              if (!i915_gem_valid_gtt_space(vma, cache_level)) {
 -                      ret = i915_vma_unbind(vma);
 -                      if (ret)
 -                              return ret;
 -              } else
 -                      bound = true;
 +              if (i915_gem_valid_gtt_space(vma, cache_level))
 +                      continue;
 +
 +              ret = i915_vma_unbind(vma);
 +              if (ret)
 +                      return ret;
 +
 +              /* As unbinding may affect other elements in the
 +               * obj->vma_list (due to side-effects from retiring
 +               * an active vma), play safe and restart the iterator.
 +               */
 +              goto restart;
        }
  
        /* We can reuse the existing drm_mm nodes but need to change the
         * rewrite the PTE in the belief that doing so tramples upon less
         * state and so involves less work.
         */
 -      if (bound) {
 +      if (obj->bind_count) {
                /* Before we change the PTE, the GPU must not be accessing it.
                 * If we wait upon the object, we know that all the bound
                 * VMA are no longer active.
                if (ret)
                        return ret;
  
 -              if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
 +              if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
                        /* Access to snoopable pages through the GTT is
                         * incoherent and on some machines causes a hard
                         * lockup. Relinquish the CPU mmaping to force
                         * dropped the fence as all snoopable access is
                         * supposed to be linear.
                         */
 -                      ret = i915_gem_object_put_fence(obj);
 -                      if (ret)
 -                              return ret;
 +                      list_for_each_entry(vma, &obj->vma_list, obj_link) {
 +                              ret = i915_vma_put_fence(vma);
 +                              if (ret)
 +                                      return ret;
 +                      }
                } else {
                        /* We either have incoherent backing store and
                         * so no GTT access or the architecture is fully
@@@ -3412,8 -4221,8 +3415,8 @@@ int i915_gem_get_caching_ioctl(struct d
        struct drm_i915_gem_caching *args = data;
        struct drm_i915_gem_object *obj;
  
 -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
 -      if (&obj->base == NULL)
 +      obj = i915_gem_object_lookup(file, args->handle);
 +      if (!obj)
                return -ENOENT;
  
        switch (obj->cache_level) {
                break;
        }
  
 -      drm_gem_object_unreference_unlocked(&obj->base);
 +      i915_gem_object_put_unlocked(obj);
        return 0;
  }
  
@@@ -3473,15 -4282,15 +3476,15 @@@ int i915_gem_set_caching_ioctl(struct d
        if (ret)
                goto rpm_put;
  
 -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
 -      if (&obj->base == NULL) {
 +      obj = i915_gem_object_lookup(file, args->handle);
 +      if (!obj) {
                ret = -ENOENT;
                goto unlock;
        }
  
        ret = i915_gem_object_set_cache_level(obj, level);
  
 -      drm_gem_object_unreference(&obj->base);
 +      i915_gem_object_put(obj);
  unlock:
        mutex_unlock(&dev->struct_mutex);
  rpm_put:
   * Can be called from an uninterruptible phase (modesetting) and allows
   * any flushes to be pipelined (for pageflips).
   */
 -int
 +struct i915_vma *
  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
                                     u32 alignment,
                                     const struct i915_ggtt_view *view)
  {
 +      struct i915_vma *vma;
        u32 old_read_domains, old_write_domain;
        int ret;
  
         */
        ret = i915_gem_object_set_cache_level(obj,
                                              HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
 -      if (ret)
 +      if (ret) {
 +              vma = ERR_PTR(ret);
                goto err_unpin_display;
 +      }
  
        /* As the user may map the buffer once pinned in the display plane
         * (e.g. libkms for the bootup splash), we have to ensure that we
 -       * always use map_and_fenceable for all scanout buffers.
 +       * always use map_and_fenceable for all scanout buffers. However,
 +       * it may simply be too big to fit into mappable, in which case
 +       * put it anyway and hope that userspace can cope (but always first
 +       * try to preserve the existing ABI).
         */
 -      ret = i915_gem_object_ggtt_pin(obj, view, alignment,
 -                                     view->type == I915_GGTT_VIEW_NORMAL ?
 -                                     PIN_MAPPABLE : 0);
 -      if (ret)
 +      vma = ERR_PTR(-ENOSPC);
 +      if (view->type == I915_GGTT_VIEW_NORMAL)
 +              vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
 +                                             PIN_MAPPABLE | PIN_NONBLOCK);
 +      if (IS_ERR(vma))
 +              vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
 +      if (IS_ERR(vma))
                goto err_unpin_display;
  
 +      vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 +
 +      WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
 +
        i915_gem_object_flush_cpu_write_domain(obj);
  
        old_write_domain = obj->base.write_domain;
                                            old_read_domains,
                                            old_write_domain);
  
 -      return 0;
 +      return vma;
  
  err_unpin_display:
        obj->pin_display--;
 -      return ret;
 +      return vma;
  }
  
  void
 -i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
 -                                       const struct i915_ggtt_view *view)
 +i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
  {
 -      if (WARN_ON(obj->pin_display == 0))
 +      if (WARN_ON(vma->obj->pin_display == 0))
                return;
  
 -      i915_gem_object_ggtt_unpin_view(obj, view);
 +      if (--vma->obj->pin_display == 0)
 +              vma->display_alignment = 0;
  
 -      obj->pin_display--;
 +      /* Bump the LRU to try and avoid premature eviction whilst flipping  */
 +      if (!i915_vma_is_active(vma))
 +              list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 +
 +      i915_vma_unpin(vma);
 +      WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
  }
  
  /**
@@@ -3598,13 -4389,13 +3601,13 @@@ i915_gem_object_set_to_cpu_domain(struc
        uint32_t old_write_domain, old_read_domains;
        int ret;
  
 -      if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 -              return 0;
 -
        ret = i915_gem_object_wait_rendering(obj, !write);
        if (ret)
                return ret;
  
 +      if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 +              return 0;
 +
        i915_gem_object_flush_gtt_write_domain(obj);
  
        old_write_domain = obj->base.write_domain;
@@@ -3679,31 -4470,28 +3682,31 @@@ i915_gem_ring_throttle(struct drm_devic
                target = request;
        }
        if (target)
 -              i915_gem_request_reference(target);
 +              i915_gem_request_get(target);
        spin_unlock(&file_priv->mm.lock);
  
        if (target == NULL)
                return 0;
  
 -      ret = __i915_wait_request(target, true, NULL, NULL);
 -      i915_gem_request_unreference(target);
 +      ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 +      i915_gem_request_put(target);
  
        return ret;
  }
  
  static bool
 -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
 +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
  {
 -      struct drm_i915_gem_object *obj = vma->obj;
 +      if (!drm_mm_node_allocated(&vma->node))
 +              return false;
 +
 +      if (vma->node.size < size)
 +              return true;
  
 -      if (alignment &&
 -          vma->node.start & (alignment - 1))
 +      if (alignment && vma->node.start & (alignment - 1))
                return true;
  
 -      if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
 +      if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
                return true;
  
        if (flags & PIN_OFFSET_BIAS &&
  void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
  {
        struct drm_i915_gem_object *obj = vma->obj;
 +      struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
        bool mappable, fenceable;
        u32 fence_size, fence_alignment;
  
 -      fence_size = i915_gem_get_gtt_size(obj->base.dev,
 -                                         obj->base.size,
 -                                         obj->tiling_mode);
 -      fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
 -                                                   obj->base.size,
 -                                                   obj->tiling_mode,
 -                                                   true);
 +      fence_size = i915_gem_get_ggtt_size(dev_priv,
 +                                          vma->size,
 +                                          i915_gem_object_get_tiling(obj));
 +      fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
 +                                                    vma->size,
 +                                                    i915_gem_object_get_tiling(obj),
 +                                                    true);
  
        fenceable = (vma->node.size == fence_size &&
                     (vma->node.start & (fence_alignment - 1)) == 0);
  
        mappable = (vma->node.start + fence_size <=
 -                  to_i915(obj->base.dev)->ggtt.mappable_end);
 +                  dev_priv->ggtt.mappable_end);
  
 -      obj->map_and_fenceable = mappable && fenceable;
 +      if (mappable && fenceable)
 +              vma->flags |= I915_VMA_CAN_FENCE;
 +      else
 +              vma->flags &= ~I915_VMA_CAN_FENCE;
  }
  
 -static int
 -i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 -                     struct i915_address_space *vm,
 -                     const struct i915_ggtt_view *ggtt_view,
 -                     uint32_t alignment,
 -                     uint64_t flags)
 +int __i915_vma_do_pin(struct i915_vma *vma,
 +                    u64 size, u64 alignment, u64 flags)
  {
 -      struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 -      struct i915_vma *vma;
 -      unsigned bound;
 +      unsigned int bound = vma->flags;
        int ret;
  
 -      if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
 -              return -ENODEV;
 -
 -      if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
 -              return -EINVAL;
 -
 -      if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
 -              return -EINVAL;
 -
 -      if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
 -              return -EINVAL;
 -
 -      vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
 -                        i915_gem_obj_to_vma(obj, vm);
 -
 -      if (vma) {
 -              if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
 -                      return -EBUSY;
 +      GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
 +      GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
  
 -              if (i915_vma_misplaced(vma, alignment, flags)) {
 -                      WARN(vma->pin_count,
 -                           "bo is already pinned in %s with incorrect alignment:"
 -                           " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
 -                           " obj->map_and_fenceable=%d\n",
 -                           ggtt_view ? "ggtt" : "ppgtt",
 -                           upper_32_bits(vma->node.start),
 -                           lower_32_bits(vma->node.start),
 -                           alignment,
 -                           !!(flags & PIN_MAPPABLE),
 -                           obj->map_and_fenceable);
 -                      ret = i915_vma_unbind(vma);
 -                      if (ret)
 -                              return ret;
 -
 -                      vma = NULL;
 -              }
 +      if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
 +              ret = -EBUSY;
 +              goto err;
        }
  
 -      bound = vma ? vma->bound : 0;
 -      if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
 -              vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
 -                                               flags);
 -              if (IS_ERR(vma))
 -                      return PTR_ERR(vma);
 -      } else {
 -              ret = i915_vma_bind(vma, obj->cache_level, flags);
 +      if ((bound & I915_VMA_BIND_MASK) == 0) {
 +              ret = i915_vma_insert(vma, size, alignment, flags);
                if (ret)
 -                      return ret;
 +                      goto err;
        }
  
 -      if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
 -          (bound ^ vma->bound) & GLOBAL_BIND) {
 +      ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
 +      if (ret)
 +              goto err;
 +
 +      if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
                __i915_vma_set_map_and_fenceable(vma);
 -              WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
 -      }
  
 -      vma->pin_count++;
 +      GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
        return 0;
 -}
  
 -int
 -i915_gem_object_pin(struct drm_i915_gem_object *obj,
 -                  struct i915_address_space *vm,
 -                  uint32_t alignment,
 -                  uint64_t flags)
 -{
 -      return i915_gem_object_do_pin(obj, vm,
 -                                    i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
 -                                    alignment, flags);
 +err:
 +      __i915_vma_unpin(vma);
 +      return ret;
  }
  
 -int
 +struct i915_vma *
  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
                         const struct i915_ggtt_view *view,
 -                       uint32_t alignment,
 -                       uint64_t flags)
 +                       u64 size,
 +                       u64 alignment,
 +                       u64 flags)
  {
 -      struct drm_device *dev = obj->base.dev;
 -      struct drm_i915_private *dev_priv = to_i915(dev);
 -      struct i915_ggtt *ggtt = &dev_priv->ggtt;
 +      struct i915_address_space *vm = &to_i915(obj->base.dev)->ggtt.base;
 +      struct i915_vma *vma;
 +      int ret;
 +
 +      vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
 +      if (IS_ERR(vma))
 +              return vma;
 +
 +      if (i915_vma_misplaced(vma, size, alignment, flags)) {
 +              if (flags & PIN_NONBLOCK &&
 +                  (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
 +                      return ERR_PTR(-ENOSPC);
 +
 +              WARN(i915_vma_is_pinned(vma),
 +                   "bo is already pinned in ggtt with incorrect alignment:"
 +                   " offset=%08x, req.alignment=%llx,"
 +                   " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
 +                   i915_ggtt_offset(vma), alignment,
 +                   !!(flags & PIN_MAPPABLE),
 +                   i915_vma_is_map_and_fenceable(vma));
 +              ret = i915_vma_unbind(vma);
 +              if (ret)
 +                      return ERR_PTR(ret);
 +      }
 +
 +      ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
 +      if (ret)
 +              return ERR_PTR(ret);
  
 -      BUG_ON(!view);
 +      return vma;
 +}
  
 -      return i915_gem_object_do_pin(obj, &ggtt->base, view,
 -                                    alignment, flags | PIN_GLOBAL);
 +static __always_inline unsigned int __busy_read_flag(unsigned int id)
 +{
 +      /* Note that we could alias engines in the execbuf API, but
 +       * that would be very unwise as it prevents userspace from
 +       * fine control over engine selection. Ahem.
 +       *
 +       * This should be something like EXEC_MAX_ENGINE instead of
 +       * I915_NUM_ENGINES.
 +       */
 +      BUILD_BUG_ON(I915_NUM_ENGINES > 16);
 +      return 0x10000 << id;
  }
  
 -void
 -i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
 -                              const struct i915_ggtt_view *view)
 +static __always_inline unsigned int __busy_write_id(unsigned int id)
  {
 -      struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
 +      /* The uABI guarantees an active writer is also amongst the read
 +       * engines. This would be true if we accessed the activity tracking
 +       * under the lock, but as we perform the lookup of the object and
 +       * its activity locklessly we can not guarantee that the last_write
 +       * being active implies that we have set the same engine flag from
 +       * last_read - hence we always set both read and write busy for
 +       * last_write.
 +       */
 +      return id | __busy_read_flag(id);
 +}
 +
 +static __always_inline unsigned int
 +__busy_set_if_active(const struct i915_gem_active *active,
 +                   unsigned int (*flag)(unsigned int id))
 +{
 +      struct drm_i915_gem_request *request;
 +
 +      request = rcu_dereference(active->request);
 +      if (!request || i915_gem_request_completed(request))
 +              return 0;
 +
 +      /* This is racy. See __i915_gem_active_get_rcu() for an in detail
 +       * discussion of how to handle the race correctly, but for reporting
 +       * the busy state we err on the side of potentially reporting the
 +       * wrong engine as being busy (but we guarantee that the result
 +       * is at least self-consistent).
 +       *
 +       * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
 +       * whilst we are inspecting it, even under the RCU read lock as we are.
 +       * This means that there is a small window for the engine and/or the
 +       * seqno to have been overwritten. The seqno will always be in the
 +       * future compared to the intended, and so we know that if that
 +       * seqno is idle (on whatever engine) our request is idle and the
 +       * return 0 above is correct.
 +       *
 +       * The issue is that if the engine is switched, it is just as likely
 +       * to report that it is busy (but since the switch happened, we know
 +       * the request should be idle). So there is a small chance that a busy
 +       * result is actually the wrong engine.
 +       *
 +       * So why don't we care?
 +       *
 +       * For starters, the busy ioctl is a heuristic that is by definition
 +       * racy. Even with perfect serialisation in the driver, the hardware
 +       * state is constantly advancing - the state we report to the user
 +       * is stale.
 +       *
 +       * The critical information for the busy-ioctl is whether the object
 +       * is idle as userspace relies on that to detect whether its next
 +       * access will stall, or if it has missed submitting commands to
 +       * the hardware allowing the GPU to stall. We never generate a
 +       * false-positive for idleness, thus busy-ioctl is reliable at the
 +       * most fundamental level, and we maintain the guarantee that a
 +       * busy object left to itself will eventually become idle (and stay
 +       * idle!).
 +       *
 +       * We allow ourselves the leeway of potentially misreporting the busy
 +       * state because that is an optimisation heuristic that is constantly
 +       * in flux. Being quickly able to detect the busy/idle state is much
 +       * more important than accurate logging of exactly which engines were
 +       * busy.
 +       *
 +       * For accuracy in reporting the engine, we could use
 +       *
 +       *      result = 0;
 +       *      request = __i915_gem_active_get_rcu(active);
 +       *      if (request) {
 +       *              if (!i915_gem_request_completed(request))
 +       *                      result = flag(request->engine->exec_id);
 +       *              i915_gem_request_put(request);
 +       *      }
 +       *
 +       * but that still remains susceptible to both hardware and userspace
 +       * races. So we accept making the result of that race slightly worse,
 +       * given the rarity of the race and its low impact on the result.
 +       */
 +      return flag(READ_ONCE(request->engine->exec_id));
 +}
  
 -      WARN_ON(vma->pin_count == 0);
 -      WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
 +static __always_inline unsigned int
 +busy_check_reader(const struct i915_gem_active *active)
 +{
 +      return __busy_set_if_active(active, __busy_read_flag);
 +}
  
 -      --vma->pin_count;
 +static __always_inline unsigned int
 +busy_check_writer(const struct i915_gem_active *active)
 +{
 +      return __busy_set_if_active(active, __busy_write_id);
  }
  
  int
@@@ -3930,64 -4645,47 +3933,64 @@@ i915_gem_busy_ioctl(struct drm_device *
  {
        struct drm_i915_gem_busy *args = data;
        struct drm_i915_gem_object *obj;
 -      int ret;
 +      unsigned long active;
  
 -      ret = i915_mutex_lock_interruptible(dev);
 -      if (ret)
 -              return ret;
 +      obj = i915_gem_object_lookup(file, args->handle);
 +      if (!obj)
 +              return -ENOENT;
  
 -      obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
 -      if (&obj->base == NULL) {
 -              ret = -ENOENT;
 -              goto unlock;
 -      }
 +      args->busy = 0;
 +      active = __I915_BO_ACTIVE(obj);
 +      if (active) {
 +              int idx;
  
 -      /* Count all active objects as busy, even if they are currently not used
 -       * by the gpu. Users of this interface expect objects to eventually
 -       * become non-busy without any further actions, therefore emit any
 -       * necessary flushes here.
 -       */
 -      ret = i915_gem_object_flush_active(obj);
 -      if (ret)
 -              goto unref;
 +              /* Yes, the lookups are intentionally racy.
 +               *
 +               * First, we cannot simply rely on __I915_BO_ACTIVE. We have
 +               * to regard the value as stale and as our ABI guarantees
 +               * forward progress, we confirm the status of each active
 +               * request with the hardware.
 +               *
 +               * Even though we guard the pointer lookup by RCU, that only
 +               * guarantees that the pointer and its contents remain
 +               * dereferencable and does *not* mean that the request we
 +               * have is the same as the one being tracked by the object.
 +               *
 +               * Consider that we lookup the request just as it is being
 +               * retired and freed. We take a local copy of the pointer,
 +               * but before we add its engine into the busy set, the other
 +               * thread reallocates it and assigns it to a task on another
 +               * engine with a fresh and incomplete seqno. Guarding against
 +               * that requires careful serialisation and reference counting,
 +               * i.e. using __i915_gem_active_get_request_rcu(). We don't,
 +               * instead we expect that if the result is busy, which engines
 +               * are busy is not completely reliable - we only guarantee
 +               * that the object was busy.
 +               */
 +              rcu_read_lock();
  
 -      args->busy = 0;
 -      if (obj->active) {
 -              int i;
 +              for_each_active(active, idx)
 +                      args->busy |= busy_check_reader(&obj->last_read[idx]);
  
 -              for (i = 0; i < I915_NUM_ENGINES; i++) {
 -                      struct drm_i915_gem_request *req;
 +              /* For ABI sanity, we only care that the write engine is in
 +               * the set of read engines. This should be ensured by the
 +               * ordering of setting last_read/last_write in
 +               * i915_vma_move_to_active(), and then in reverse in retire.
 +               * However, for good measure, we always report the last_write
 +               * request as a busy read as well as being a busy write.
 +               *
 +               * We don't care that the set of active read/write engines
 +               * may change during construction of the result, as it is
 +               * equally liable to change before userspace can inspect
 +               * the result.
 +               */
 +              args->busy |= busy_check_writer(&obj->last_write);
  
 -                      req = obj->last_read_req[i];
 -                      if (req)
 -                              args->busy |= 1 << (16 + req->engine->exec_id);
 -              }
 -              if (obj->last_write_req)
 -                      args->busy |= obj->last_write_req->engine->exec_id;
 +              rcu_read_unlock();
        }
  
 -unref:
 -      drm_gem_object_unreference(&obj->base);
 -unlock:
 -      mutex_unlock(&dev->struct_mutex);
 -      return ret;
 +      i915_gem_object_put_unlocked(obj);
 +      return 0;
  }
  
  int
@@@ -4018,14 -4716,19 +4021,14 @@@ i915_gem_madvise_ioctl(struct drm_devic
        if (ret)
                return ret;
  
 -      obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle));
 -      if (&obj->base == NULL) {
 +      obj = i915_gem_object_lookup(file_priv, args->handle);
 +      if (!obj) {
                ret = -ENOENT;
                goto unlock;
        }
  
 -      if (i915_gem_obj_is_pinned(obj)) {
 -              ret = -EINVAL;
 -              goto out;
 -      }
 -
        if (obj->pages &&
 -          obj->tiling_mode != I915_TILING_NONE &&
 +          i915_gem_object_is_tiled(obj) &&
            dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
                if (obj->madv == I915_MADV_WILLNEED)
                        i915_gem_object_unpin_pages(obj);
  
        args->retained = obj->madv != __I915_MADV_PURGED;
  
 -out:
 -      drm_gem_object_unreference(&obj->base);
 +      i915_gem_object_put(obj);
  unlock:
        mutex_unlock(&dev->struct_mutex);
        return ret;
@@@ -4055,17 -4759,14 +4058,17 @@@ void i915_gem_object_init(struct drm_i9
  
        INIT_LIST_HEAD(&obj->global_list);
        for (i = 0; i < I915_NUM_ENGINES; i++)
 -              INIT_LIST_HEAD(&obj->engine_list[i]);
 +              init_request_active(&obj->last_read[i],
 +                                  i915_gem_object_retire__read);
 +      init_request_active(&obj->last_write,
 +                          i915_gem_object_retire__write);
        INIT_LIST_HEAD(&obj->obj_exec_link);
        INIT_LIST_HEAD(&obj->vma_list);
        INIT_LIST_HEAD(&obj->batch_pool_link);
  
        obj->ops = ops;
  
 -      obj->fence_reg = I915_FENCE_REG_NONE;
 +      obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
        obj->madv = I915_MADV_WILLNEED;
  
        i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
@@@ -4170,31 -4871,33 +4173,31 @@@ void i915_gem_free_object(struct drm_ge
  
        trace_i915_gem_object_destroy(obj);
  
 +      /* All file-owned VMA should have been released by this point through
 +       * i915_gem_close_object(), or earlier by i915_gem_context_close().
 +       * However, the object may also be bound into the global GTT (e.g.
 +       * older GPUs without per-process support, or for direct access through
 +       * the GTT either for the user or for scanout). Those VMA still need to
 +       * unbound now.
 +       */
        list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
 -              int ret;
 -
 -              vma->pin_count = 0;
 -              ret = i915_vma_unbind(vma);
 -              if (WARN_ON(ret == -ERESTARTSYS)) {
 -                      bool was_interruptible;
 -
 -                      was_interruptible = dev_priv->mm.interruptible;
 -                      dev_priv->mm.interruptible = false;
 -
 -                      WARN_ON(i915_vma_unbind(vma));
 -
 -                      dev_priv->mm.interruptible = was_interruptible;
 -              }
 +              GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 +              GEM_BUG_ON(i915_vma_is_active(vma));
 +              vma->flags &= ~I915_VMA_PIN_MASK;
 +              i915_vma_close(vma);
        }
 +      GEM_BUG_ON(obj->bind_count);
  
        /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
         * before progressing. */
        if (obj->stolen)
                i915_gem_object_unpin_pages(obj);
  
 -      WARN_ON(obj->frontbuffer_bits);
 +      WARN_ON(atomic_read(&obj->frontbuffer_bits));
  
        if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
            dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
 -          obj->tiling_mode != I915_TILING_NONE)
 +          i915_gem_object_is_tiled(obj))
                i915_gem_object_unpin_pages(obj);
  
        if (WARN_ON(obj->pages_pin_count))
        if (discard_backing_storage(obj))
                obj->madv = I915_MADV_DONTNEED;
        i915_gem_object_put_pages(obj);
 -      i915_gem_object_free_mmap_offset(obj);
  
        BUG_ON(obj->pages);
  
        intel_runtime_pm_put(dev_priv);
  }
  
 -struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 -                                   struct i915_address_space *vm)
 -{
 -      struct i915_vma *vma;
 -      list_for_each_entry(vma, &obj->vma_list, obj_link) {
 -              if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
 -                  vma->vm == vm)
 -                      return vma;
 -      }
 -      return NULL;
 -}
 -
 -struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
 -                                         const struct i915_ggtt_view *view)
 -{
 -      struct i915_vma *vma;
 -
 -      GEM_BUG_ON(!view);
 -
 -      list_for_each_entry(vma, &obj->vma_list, obj_link)
 -              if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
 -                      return vma;
 -      return NULL;
 -}
 -
 -void i915_gem_vma_destroy(struct i915_vma *vma)
 -{
 -      WARN_ON(vma->node.allocated);
 -
 -      /* Keep the vma as a placeholder in the execbuffer reservation lists */
 -      if (!list_empty(&vma->exec_list))
 -              return;
 -
 -      if (!vma->is_ggtt)
 -              i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
 -
 -      list_del(&vma->obj_link);
 -
 -      kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
 -}
 -
 -static void
 -i915_gem_stop_engines(struct drm_device *dev)
 +int i915_gem_suspend(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = to_i915(dev);
 -      struct intel_engine_cs *engine;
 -
 -      for_each_engine(engine, dev_priv)
 -              dev_priv->gt.stop_engine(engine);
 -}
 +      int ret;
  
 -int
 -i915_gem_suspend(struct drm_device *dev)
 -{
 -      struct drm_i915_private *dev_priv = to_i915(dev);
 -      int ret = 0;
 +      intel_suspend_gt_powersave(dev_priv);
  
        mutex_lock(&dev->struct_mutex);
 -      ret = i915_gem_wait_for_idle(dev_priv);
 +
 +      /* We have to flush all the executing contexts to main memory so
 +       * that they can saved in the hibernation image. To ensure the last
 +       * context image is coherent, we have to switch away from it. That
 +       * leaves the dev_priv->kernel_context still active when
 +       * we actually suspend, and its image in memory may not match the GPU
 +       * state. Fortunately, the kernel_context is disposable and we do
 +       * not rely on its state.
 +       */
 +      ret = i915_gem_switch_to_kernel_context(dev_priv);
 +      if (ret)
 +              goto err;
 +
 +      ret = i915_gem_wait_for_idle(dev_priv,
 +                                   I915_WAIT_INTERRUPTIBLE |
 +                                   I915_WAIT_LOCKED);
        if (ret)
                goto err;
  
        i915_gem_retire_requests(dev_priv);
  
 -      i915_gem_stop_engines(dev);
        i915_gem_context_lost(dev_priv);
        mutex_unlock(&dev->struct_mutex);
  
        return ret;
  }
  
 +void i915_gem_resume(struct drm_device *dev)
 +{
 +      struct drm_i915_private *dev_priv = to_i915(dev);
 +
 +      mutex_lock(&dev->struct_mutex);
 +      i915_gem_restore_gtt_mappings(dev);
 +
 +      /* As we didn't flush the kernel context before suspend, we cannot
 +       * guarantee that the context image is complete. So let's just reset
 +       * it and start again.
 +       */
 +      dev_priv->gt.resume(dev_priv);
 +
 +      mutex_unlock(&dev->struct_mutex);
 +}
 +
  void i915_gem_init_swizzling(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = to_i915(dev);
@@@ -4336,6 -5060,53 +4339,6 @@@ static void init_unused_rings(struct dr
        }
  }
  
 -int i915_gem_init_engines(struct drm_device *dev)
 -{
 -      struct drm_i915_private *dev_priv = to_i915(dev);
 -      int ret;
 -
 -      ret = intel_init_render_ring_buffer(dev);
 -      if (ret)
 -              return ret;
 -
 -      if (HAS_BSD(dev)) {
 -              ret = intel_init_bsd_ring_buffer(dev);
 -              if (ret)
 -                      goto cleanup_render_ring;
 -      }
 -
 -      if (HAS_BLT(dev)) {
 -              ret = intel_init_blt_ring_buffer(dev);
 -              if (ret)
 -                      goto cleanup_bsd_ring;
 -      }
 -
 -      if (HAS_VEBOX(dev)) {
 -              ret = intel_init_vebox_ring_buffer(dev);
 -              if (ret)
 -                      goto cleanup_blt_ring;
 -      }
 -
 -      if (HAS_BSD2(dev)) {
 -              ret = intel_init_bsd2_ring_buffer(dev);
 -              if (ret)
 -                      goto cleanup_vebox_ring;
 -      }
 -
 -      return 0;
 -
 -cleanup_vebox_ring:
 -      intel_cleanup_engine(&dev_priv->engine[VECS]);
 -cleanup_blt_ring:
 -      intel_cleanup_engine(&dev_priv->engine[BCS]);
 -cleanup_bsd_ring:
 -      intel_cleanup_engine(&dev_priv->engine[VCS]);
 -cleanup_render_ring:
 -      intel_cleanup_engine(&dev_priv->engine[RCS]);
 -
 -      return ret;
 -}
 -
  int
  i915_gem_init_hw(struct drm_device *dev)
  {
        return ret;
  }
  
 +bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
 +{
 +      if (INTEL_INFO(dev_priv)->gen < 6)
 +              return false;
 +
 +      /* TODO: make semaphores and Execlists play nicely together */
 +      if (i915.enable_execlists)
 +              return false;
 +
 +      if (value >= 0)
 +              return value;
 +
 +#ifdef CONFIG_INTEL_IOMMU
 +      /* Enable semaphores on SNB when IO remapping is off */
 +      if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
 +              return false;
 +#endif
 +
 +      return true;
 +}
 +
  int i915_gem_init(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = to_i915(dev);
        mutex_lock(&dev->struct_mutex);
  
        if (!i915.enable_execlists) {
 -              dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
 -              dev_priv->gt.init_engines = i915_gem_init_engines;
 -              dev_priv->gt.cleanup_engine = intel_cleanup_engine;
 -              dev_priv->gt.stop_engine = intel_stop_engine;
 +              dev_priv->gt.resume = intel_legacy_submission_resume;
 +              dev_priv->gt.cleanup_engine = intel_engine_cleanup;
        } else {
 -              dev_priv->gt.execbuf_submit = intel_execlists_submission;
 -              dev_priv->gt.init_engines = intel_logical_rings_init;
 +              dev_priv->gt.resume = intel_lr_context_resume;
                dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
 -              dev_priv->gt.stop_engine = intel_logical_ring_stop;
        }
  
        /* This is just a security blanket to placate dragons.
        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
  
        i915_gem_init_userptr(dev_priv);
 -      i915_gem_init_ggtt(dev);
 +
 +      ret = i915_gem_init_ggtt(dev_priv);
 +      if (ret)
 +              goto out_unlock;
  
        ret = i915_gem_context_init(dev);
        if (ret)
                goto out_unlock;
  
 -      ret = dev_priv->gt.init_engines(dev);
 +      ret = intel_engines_init(dev);
        if (ret)
                goto out_unlock;
  
        ret = i915_gem_init_hw(dev);
        if (ret == -EIO) {
 -              /* Allow ring initialisation to fail by marking the GPU as
 +              /* Allow engine initialisation to fail by marking the GPU as
                 * wedged. But we only want to do this where the GPU is angry,
                 * for all other failure, such as an allocation failure, bail.
                 */
                DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
 -              atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
 +              i915_gem_set_wedged(dev_priv);
                ret = 0;
        }
  
@@@ -4491,6 -5242,7 +4494,6 @@@ i915_gem_cleanup_engines(struct drm_dev
  static void
  init_engine_lists(struct intel_engine_cs *engine)
  {
 -      INIT_LIST_HEAD(&engine->active_list);
        INIT_LIST_HEAD(&engine->request_list);
  }
  
@@@ -4498,7 -5250,6 +4501,7 @@@ voi
  i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
  {
        struct drm_device *dev = &dev_priv->drm;
 +      int i;
  
        if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
            !IS_CHERRYVIEW(dev_priv))
                                I915_READ(vgtif_reg(avail_rs.fence_num));
  
        /* Initialize fence registers to zero */
 +      for (i = 0; i < dev_priv->num_fence_regs; i++) {
 +              struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
 +
 +              fence->i915 = dev_priv;
 +              fence->id = i;
 +              list_add_tail(&fence->link, &dev_priv->mm.fence_list);
 +      }
        i915_gem_restore_fences(dev);
  
        i915_gem_detect_bit_6_swizzle(dev);
@@@ -4545,17 -5289,18 +4548,17 @@@ i915_gem_load_init(struct drm_device *d
        dev_priv->requests =
                kmem_cache_create("i915_gem_request",
                                  sizeof(struct drm_i915_gem_request), 0,
 -                                SLAB_HWCACHE_ALIGN,
 +                                SLAB_HWCACHE_ALIGN |
 +                                SLAB_RECLAIM_ACCOUNT |
 +                                SLAB_DESTROY_BY_RCU,
                                  NULL);
  
 -      INIT_LIST_HEAD(&dev_priv->vm_list);
        INIT_LIST_HEAD(&dev_priv->context_list);
        INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
        INIT_LIST_HEAD(&dev_priv->mm.bound_list);
        INIT_LIST_HEAD(&dev_priv->mm.fence_list);
        for (i = 0; i < I915_NUM_ENGINES; i++)
                init_engine_lists(&dev_priv->engine[i]);
 -      for (i = 0; i < I915_MAX_NUM_FENCES; i++)
 -              INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
        INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
                          i915_gem_retire_work_handler);
        INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
  
        dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
  
 -      INIT_LIST_HEAD(&dev_priv->mm.fence_list);
 -
        init_waitqueue_head(&dev_priv->pending_flip_queue);
  
        dev_priv->mm.interruptible = true;
  
 -      mutex_init(&dev_priv->fb_tracking.lock);
 +      atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 +
 +      spin_lock_init(&dev_priv->fb_tracking.lock);
  }
  
  void i915_gem_load_cleanup(struct drm_device *dev)
        kmem_cache_destroy(dev_priv->requests);
        kmem_cache_destroy(dev_priv->vmas);
        kmem_cache_destroy(dev_priv->objects);
 +
 +      /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
 +      rcu_barrier();
  }
  
  int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
  {
        struct drm_i915_gem_object *obj;
 +      struct list_head *phases[] = {
 +              &dev_priv->mm.unbound_list,
 +              &dev_priv->mm.bound_list,
 +              NULL
 +      }, **p;
  
        /* Called just before we write the hibernation image.
         *
         *
         * To make sure the hibernation image contains the latest state,
         * we update that state just before writing out the image.
 +       *
 +       * To try and reduce the hibernation image, we manually shrink
 +       * the objects as well.
         */
  
 -      list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
 -              obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 -              obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 -      }
 +      i915_gem_shrink_all(dev_priv);
  
 -      list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 -              obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 -              obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 +      for (p = phases; *p; p++) {
 +              list_for_each_entry(obj, *p, global_list) {
 +                      obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 +                      obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 +              }
        }
  
        return 0;
  void i915_gem_release(struct drm_device *dev, struct drm_file *file)
  {
        struct drm_i915_file_private *file_priv = file->driver_priv;
 +      struct drm_i915_gem_request *request;
  
        /* Clean up our request list when the client is going away, so that
         * later retire_requests won't dereference our soon-to-be-gone
         * file_priv.
         */
        spin_lock(&file_priv->mm.lock);
 -      while (!list_empty(&file_priv->mm.request_list)) {
 -              struct drm_i915_gem_request *request;
 -
 -              request = list_first_entry(&file_priv->mm.request_list,
 -                                         struct drm_i915_gem_request,
 -                                         client_list);
 -              list_del(&request->client_list);
 +      list_for_each_entry(request, &file_priv->mm.request_list, client_list)
                request->file_priv = NULL;
 -      }
        spin_unlock(&file_priv->mm.lock);
  
        if (!list_empty(&file_priv->rps.link)) {
@@@ -4661,7 -5402,7 +4664,7 @@@ int i915_gem_open(struct drm_device *de
        spin_lock_init(&file_priv->mm.lock);
        INIT_LIST_HEAD(&file_priv->mm.request_list);
  
 -      file_priv->bsd_ring = -1;
 +      file_priv->bsd_engine = -1;
  
        ret = i915_gem_context_open(dev, file);
        if (ret)
@@@ -4683,24 -5424,118 +4686,24 @@@ void i915_gem_track_fb(struct drm_i915_
                       struct drm_i915_gem_object *new,
                       unsigned frontbuffer_bits)
  {
 +      /* Control of individual bits within the mask are guarded by
 +       * the owning plane->mutex, i.e. we can never see concurrent
 +       * manipulation of individual bits. But since the bitfield as a whole
 +       * is updated using RMW, we need to use atomics in order to update
 +       * the bits.
 +       */
 +      BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
 +                   sizeof(atomic_t) * BITS_PER_BYTE);
 +
        if (old) {
 -              WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
 -              WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
 -              old->frontbuffer_bits &= ~frontbuffer_bits;
 +              WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
 +              atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
        }
  
        if (new) {
 -              WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
 -              WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
 -              new->frontbuffer_bits |= frontbuffer_bits;
 -      }
 -}
 -
 -/* All the new VM stuff */
 -u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
 -                      struct i915_address_space *vm)
 -{
 -      struct drm_i915_private *dev_priv = to_i915(o->base.dev);
 -      struct i915_vma *vma;
 -
 -      WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
 -
 -      list_for_each_entry(vma, &o->vma_list, obj_link) {
 -              if (vma->is_ggtt &&
 -                  vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
 -                      continue;
 -              if (vma->vm == vm)
 -                      return vma->node.start;
 -      }
 -
 -      WARN(1, "%s vma for this object not found.\n",
 -           i915_is_ggtt(vm) ? "global" : "ppgtt");
 -      return -1;
 -}
 -
 -u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
 -                                const struct i915_ggtt_view *view)
 -{
 -      struct i915_vma *vma;
 -
 -      list_for_each_entry(vma, &o->vma_list, obj_link)
 -              if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
 -                      return vma->node.start;
 -
 -      WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
 -      return -1;
 -}
 -
 -bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
 -                      struct i915_address_space *vm)
 -{
 -      struct i915_vma *vma;
 -
 -      list_for_each_entry(vma, &o->vma_list, obj_link) {
 -              if (vma->is_ggtt &&
 -                  vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
 -                      continue;
 -              if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
 -                      return true;
 +              WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
 +              atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
        }
 -
 -      return false;
 -}
 -
 -bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
 -                                const struct i915_ggtt_view *view)
 -{
 -      struct i915_vma *vma;
 -
 -      list_for_each_entry(vma, &o->vma_list, obj_link)
 -              if (vma->is_ggtt &&
 -                  i915_ggtt_view_equal(&vma->ggtt_view, view) &&
 -                  drm_mm_node_allocated(&vma->node))
 -                      return true;
 -
 -      return false;
 -}
 -
 -bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
 -{
 -      struct i915_vma *vma;
 -
 -      list_for_each_entry(vma, &o->vma_list, obj_link)
 -              if (drm_mm_node_allocated(&vma->node))
 -                      return true;
 -
 -      return false;
 -}
 -
 -unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
 -{
 -      struct i915_vma *vma;
 -
 -      GEM_BUG_ON(list_empty(&o->vma_list));
 -
 -      list_for_each_entry(vma, &o->vma_list, obj_link) {
 -              if (vma->is_ggtt &&
 -                  vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
 -                      return vma->node.size;
 -      }
 -
 -      return 0;
 -}
 -
 -bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
 -{
 -      struct i915_vma *vma;
 -      list_for_each_entry(vma, &obj->vma_list, obj_link)
 -              if (vma->pin_count > 0)
 -                      return true;
 -
 -      return false;
  }
  
  /* Like i915_gem_object_get_page(), but mark the returned page dirty */
@@@ -4755,6 -5590,6 +4758,6 @@@ i915_gem_object_create_from_data(struc
        return obj;
  
  fail:
 -      drm_gem_object_unreference(&obj->base);
 +      i915_gem_object_put(obj);
        return ERR_PTR(ret);
  }
index 497d99b8846883692ea0be1bb818b29319a5c15e,175595fc3e45d239ce528662696167bac4c73327..8d4c35d55b1bbd4e2848b5bf4d69a0abbdd05cbc
@@@ -34,7 -34,6 +34,7 @@@
  #include <drm/drm_edid.h>
  #include <drm/drmP.h>
  #include "intel_drv.h"
 +#include "intel_frontbuffer.h"
  #include <drm/i915_drm.h>
  #include "i915_drv.h"
  #include "i915_gem_dmabuf.h"
@@@ -1202,8 -1201,8 +1202,8 @@@ void assert_panel_unlocked(struct drm_i
        if (HAS_PCH_SPLIT(dev)) {
                u32 port_sel;
  
 -              pp_reg = PCH_PP_CONTROL;
 -              port_sel = I915_READ(PCH_PP_ON_DELAYS) & PANEL_PORT_SELECT_MASK;
 +              pp_reg = PP_CONTROL(0);
 +              port_sel = I915_READ(PP_ON_DELAYS(0)) & PANEL_PORT_SELECT_MASK;
  
                if (port_sel == PANEL_PORT_SELECT_LVDS &&
                    I915_READ(PCH_LVDS) & LVDS_PIPEB_SELECT)
                /* XXX: else fix for eDP */
        } else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
                /* presumably write lock depends on pipe, not port select */
 -              pp_reg = VLV_PIPE_PP_CONTROL(pipe);
 +              pp_reg = PP_CONTROL(pipe);
                panel_pipe = pipe;
        } else {
 -              pp_reg = PP_CONTROL;
 +              pp_reg = PP_CONTROL(0);
                if (I915_READ(LVDS) & LVDS_PIPEB_SELECT)
                        panel_pipe = PIPE_B;
        }
@@@ -1907,7 -1906,7 +1907,7 @@@ static void ironlake_disable_pch_transc
        }
  }
  
 -static void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv)
 +void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv)
  {
        u32 val;
  
@@@ -1959,12 -1958,12 +1959,12 @@@ static void intel_enable_pipe(struct in
         * a plane.  On ILK+ the pipe PLLs are integrated, so we don't
         * need the check.
         */
 -      if (HAS_GMCH_DISPLAY(dev_priv))
 +      if (HAS_GMCH_DISPLAY(dev_priv)) {
                if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI))
                        assert_dsi_pll_enabled(dev_priv);
                else
                        assert_pll_enabled(dev_priv, pipe);
 -      else {
 +      else {
                if (crtc->config->has_pch_encoder) {
                        /* if driving the PCH, we need FDI enabled */
                        assert_fdi_rx_pll_enabled(dev_priv, pch_transcoder);
@@@ -2147,6 -2146,33 +2147,6 @@@ intel_fill_fb_ggtt_view(struct i915_ggt
        }
  }
  
 -static void
 -intel_fill_fb_info(struct drm_i915_private *dev_priv,
 -                 struct drm_framebuffer *fb)
 -{
 -      struct intel_rotation_info *info = &to_intel_framebuffer(fb)->rot_info;
 -      unsigned int tile_size, tile_width, tile_height, cpp;
 -
 -      tile_size = intel_tile_size(dev_priv);
 -
 -      cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 -      intel_tile_dims(dev_priv, &tile_width, &tile_height,
 -                      fb->modifier[0], cpp);
 -
 -      info->plane[0].width = DIV_ROUND_UP(fb->pitches[0], tile_width * cpp);
 -      info->plane[0].height = DIV_ROUND_UP(fb->height, tile_height);
 -
 -      if (info->pixel_format == DRM_FORMAT_NV12) {
 -              cpp = drm_format_plane_cpp(fb->pixel_format, 1);
 -              intel_tile_dims(dev_priv, &tile_width, &tile_height,
 -                              fb->modifier[1], cpp);
 -
 -              info->uv_offset = fb->offsets[1];
 -              info->plane[1].width = DIV_ROUND_UP(fb->pitches[1], tile_width * cpp);
 -              info->plane[1].height = DIV_ROUND_UP(fb->height / 2, tile_height);
 -      }
 -}
 -
  static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv)
  {
        if (INTEL_INFO(dev_priv)->gen >= 9)
@@@ -2179,15 -2205,16 +2179,15 @@@ static unsigned int intel_surf_alignmen
        }
  }
  
 -int
 -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 -                         unsigned int rotation)
 +struct i915_vma *
 +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
  {
        struct drm_device *dev = fb->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct drm_i915_gem_object *obj = intel_fb_obj(fb);
        struct i915_ggtt_view view;
 +      struct i915_vma *vma;
        u32 alignment;
 -      int ret;
  
        WARN_ON(!mutex_is_locked(&dev->struct_mutex));
  
         */
        intel_runtime_pm_get(dev_priv);
  
 -      ret = i915_gem_object_pin_to_display_plane(obj, alignment,
 -                                                 &view);
 -      if (ret)
 -              goto err_pm;
 -
 -      /* Install a fence for tiled scan-out. Pre-i965 always needs a
 -       * fence, whereas 965+ only requires a fence if using
 -       * framebuffer compression.  For simplicity, we always install
 -       * a fence as the cost is not that onerous.
 -       */
 -      if (view.type == I915_GGTT_VIEW_NORMAL) {
 -              ret = i915_gem_object_get_fence(obj);
 -              if (ret == -EDEADLK) {
 -                      /*
 -                       * -EDEADLK means there are no free fences
 -                       * no pending flips.
 -                       *
 -                       * This is propagated to atomic, but it uses
 -                       * -EDEADLK to force a locking recovery, so
 -                       * change the returned error to -EBUSY.
 -                       */
 -                      ret = -EBUSY;
 -                      goto err_unpin;
 -              } else if (ret)
 -                      goto err_unpin;
 +      vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
 +      if (IS_ERR(vma))
 +              goto err;
  
 -              i915_gem_object_pin_fence(obj);
 +      if (i915_vma_is_map_and_fenceable(vma)) {
 +              /* Install a fence for tiled scan-out. Pre-i965 always needs a
 +               * fence, whereas 965+ only requires a fence if using
 +               * framebuffer compression.  For simplicity, we always, when
 +               * possible, install a fence as the cost is not that onerous.
 +               *
 +               * If we fail to fence the tiled scanout, then either the
 +               * modeset will reject the change (which is highly unlikely as
 +               * the affected systems, all but one, do not have unmappable
 +               * space) or we will not be able to enable full powersaving
 +               * techniques (also likely not to apply due to various limits
 +               * FBC and the like impose on the size of the buffer, which
 +               * presumably we violated anyway with this unmappable buffer).
 +               * Anyway, it is presumably better to stumble onwards with
 +               * something and try to run the system in a "less than optimal"
 +               * mode that matches the user configuration.
 +               */
 +              if (i915_vma_get_fence(vma) == 0)
 +                      i915_vma_pin_fence(vma);
        }
  
 +err:
        intel_runtime_pm_put(dev_priv);
 -      return 0;
 -
 -err_unpin:
 -      i915_gem_object_unpin_from_display_plane(obj, &view);
 -err_pm:
 -      intel_runtime_pm_put(dev_priv);
 -      return ret;
 +      return vma;
  }
  
  void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
  {
        struct drm_i915_gem_object *obj = intel_fb_obj(fb);
        struct i915_ggtt_view view;
 +      struct i915_vma *vma;
  
        WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
  
        intel_fill_fb_ggtt_view(&view, fb, rotation);
 +      vma = i915_gem_object_to_ggtt(obj, &view);
  
 -      if (view.type == I915_GGTT_VIEW_NORMAL)
 -              i915_gem_object_unpin_fence(obj);
 +      i915_vma_unpin_fence(vma);
 +      i915_gem_object_unpin_from_display_plane(vma);
 +}
  
 -      i915_gem_object_unpin_from_display_plane(obj, &view);
 +static int intel_fb_pitch(const struct drm_framebuffer *fb, int plane,
 +                        unsigned int rotation)
 +{
 +      if (intel_rotation_90_or_270(rotation))
 +              return to_intel_framebuffer(fb)->rotated[plane].pitch;
 +      else
 +              return fb->pitches[plane];
 +}
 +
 +/*
 + * Convert the x/y offsets into a linear offset.
 + * Only valid with 0/180 degree rotation, which is fine since linear
 + * offset is only used with linear buffers on pre-hsw and tiled buffers
 + * with gen2/3, and 90/270 degree rotations isn't supported on any of them.
 + */
 +u32 intel_fb_xy_to_linear(int x, int y,
 +                        const struct intel_plane_state *state,
 +                        int plane)
 +{
 +      const struct drm_framebuffer *fb = state->base.fb;
 +      unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
 +      unsigned int pitch = fb->pitches[plane];
 +
 +      return y * pitch + x * cpp;
 +}
 +
 +/*
 + * Add the x/y offsets derived from fb->offsets[] to the user
 + * specified plane src x/y offsets. The resulting x/y offsets
 + * specify the start of scanout from the beginning of the gtt mapping.
 + */
 +void intel_add_fb_offsets(int *x, int *y,
 +                        const struct intel_plane_state *state,
 +                        int plane)
 +
 +{
 +      const struct intel_framebuffer *intel_fb = to_intel_framebuffer(state->base.fb);
 +      unsigned int rotation = state->base.rotation;
 +
 +      if (intel_rotation_90_or_270(rotation)) {
 +              *x += intel_fb->rotated[plane].x;
 +              *y += intel_fb->rotated[plane].y;
 +      } else {
 +              *x += intel_fb->normal[plane].x;
 +              *y += intel_fb->normal[plane].y;
 +      }
  }
  
  /*
 - * Adjust the tile offset by moving the difference into
 - * the x/y offsets.
 - *
   * Input tile dimensions and pitch must already be
   * rotated to match x and y, and in pixel units.
   */
 -static u32 intel_adjust_tile_offset(int *x, int *y,
 -                                  unsigned int tile_width,
 -                                  unsigned int tile_height,
 -                                  unsigned int tile_size,
 -                                  unsigned int pitch_tiles,
 -                                  u32 old_offset,
 -                                  u32 new_offset)
 -{
 +static u32 _intel_adjust_tile_offset(int *x, int *y,
 +                                   unsigned int tile_width,
 +                                   unsigned int tile_height,
 +                                   unsigned int tile_size,
 +                                   unsigned int pitch_tiles,
 +                                   u32 old_offset,
 +                                   u32 new_offset)
 +{
 +      unsigned int pitch_pixels = pitch_tiles * tile_width;
        unsigned int tiles;
  
        WARN_ON(old_offset & (tile_size - 1));
        *y += tiles / pitch_tiles * tile_height;
        *x += tiles % pitch_tiles * tile_width;
  
 +      /* minimize x in case it got needlessly big */
 +      *y += *x / pitch_pixels * tile_height;
 +      *x %= pitch_pixels;
 +
 +      return new_offset;
 +}
 +
 +/*
 + * Adjust the tile offset by moving the difference into
 + * the x/y offsets.
 + */
 +static u32 intel_adjust_tile_offset(int *x, int *y,
 +                                  const struct intel_plane_state *state, int plane,
 +                                  u32 old_offset, u32 new_offset)
 +{
 +      const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
 +      const struct drm_framebuffer *fb = state->base.fb;
 +      unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
 +      unsigned int rotation = state->base.rotation;
 +      unsigned int pitch = intel_fb_pitch(fb, plane, rotation);
 +
 +      WARN_ON(new_offset > old_offset);
 +
 +      if (fb->modifier[plane] != DRM_FORMAT_MOD_NONE) {
 +              unsigned int tile_size, tile_width, tile_height;
 +              unsigned int pitch_tiles;
 +
 +              tile_size = intel_tile_size(dev_priv);
 +              intel_tile_dims(dev_priv, &tile_width, &tile_height,
 +                              fb->modifier[plane], cpp);
 +
 +              if (intel_rotation_90_or_270(rotation)) {
 +                      pitch_tiles = pitch / tile_height;
 +                      swap(tile_width, tile_height);
 +              } else {
 +                      pitch_tiles = pitch / (tile_width * cpp);
 +              }
 +
 +              _intel_adjust_tile_offset(x, y, tile_width, tile_height,
 +                                        tile_size, pitch_tiles,
 +                                        old_offset, new_offset);
 +      } else {
 +              old_offset += *y * pitch + *x * cpp;
 +
 +              *y = (old_offset - new_offset) / pitch;
 +              *x = ((old_offset - new_offset) - *y * pitch) / cpp;
 +      }
 +
        return new_offset;
  }
  
   * In the 90/270 rotated case, x and y are assumed
   * to be already rotated to match the rotated GTT view, and
   * pitch is the tile_height aligned framebuffer height.
 + *
 + * This function is used when computing the derived information
 + * under intel_framebuffer, so using any of that information
 + * here is not allowed. Anything under drm_framebuffer can be
 + * used. This is why the user has to pass in the pitch since it
 + * is specified in the rotated orientation.
   */
 -u32 intel_compute_tile_offset(int *x, int *y,
 -                            const struct drm_framebuffer *fb, int plane,
 -                            unsigned int pitch,
 -                            unsigned int rotation)
 +static u32 _intel_compute_tile_offset(const struct drm_i915_private *dev_priv,
 +                                    int *x, int *y,
 +                                    const struct drm_framebuffer *fb, int plane,
 +                                    unsigned int pitch,
 +                                    unsigned int rotation,
 +                                    u32 alignment)
  {
 -      const struct drm_i915_private *dev_priv = to_i915(fb->dev);
        uint64_t fb_modifier = fb->modifier[plane];
        unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
 -      u32 offset, offset_aligned, alignment;
 +      u32 offset, offset_aligned;
  
 -      alignment = intel_surf_alignment(dev_priv, fb_modifier);
        if (alignment)
                alignment--;
  
                offset = (tile_rows * pitch_tiles + tiles) * tile_size;
                offset_aligned = offset & ~alignment;
  
 -              intel_adjust_tile_offset(x, y, tile_width, tile_height,
 -                                       tile_size, pitch_tiles,
 -                                       offset, offset_aligned);
 +              _intel_adjust_tile_offset(x, y, tile_width, tile_height,
 +                                        tile_size, pitch_tiles,
 +                                        offset, offset_aligned);
        } else {
                offset = *y * pitch + *x * cpp;
                offset_aligned = offset & ~alignment;
        return offset_aligned;
  }
  
 +u32 intel_compute_tile_offset(int *x, int *y,
 +                            const struct intel_plane_state *state,
 +                            int plane)
 +{
 +      const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
 +      const struct drm_framebuffer *fb = state->base.fb;
 +      unsigned int rotation = state->base.rotation;
 +      int pitch = intel_fb_pitch(fb, plane, rotation);
 +      u32 alignment;
 +
 +      /* AUX_DIST needs only 4K alignment */
 +      if (fb->pixel_format == DRM_FORMAT_NV12 && plane == 1)
 +              alignment = 4096;
 +      else
 +              alignment = intel_surf_alignment(dev_priv, fb->modifier[plane]);
 +
 +      return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch,
 +                                        rotation, alignment);
 +}
 +
 +/* Convert the fb->offset[] linear offset into x/y offsets */
 +static void intel_fb_offset_to_xy(int *x, int *y,
 +                                const struct drm_framebuffer *fb, int plane)
 +{
 +      unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
 +      unsigned int pitch = fb->pitches[plane];
 +      u32 linear_offset = fb->offsets[plane];
 +
 +      *y = linear_offset / pitch;
 +      *x = linear_offset % pitch / cpp;
 +}
 +
 +static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier)
 +{
 +      switch (fb_modifier) {
 +      case I915_FORMAT_MOD_X_TILED:
 +              return I915_TILING_X;
 +      case I915_FORMAT_MOD_Y_TILED:
 +              return I915_TILING_Y;
 +      default:
 +              return I915_TILING_NONE;
 +      }
 +}
 +
 +static int
 +intel_fill_fb_info(struct drm_i915_private *dev_priv,
 +                 struct drm_framebuffer *fb)
 +{
 +      struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
 +      struct intel_rotation_info *rot_info = &intel_fb->rot_info;
 +      u32 gtt_offset_rotated = 0;
 +      unsigned int max_size = 0;
 +      uint32_t format = fb->pixel_format;
 +      int i, num_planes = drm_format_num_planes(format);
 +      unsigned int tile_size = intel_tile_size(dev_priv);
 +
 +      for (i = 0; i < num_planes; i++) {
 +              unsigned int width, height;
 +              unsigned int cpp, size;
 +              u32 offset;
 +              int x, y;
 +
 +              cpp = drm_format_plane_cpp(format, i);
 +              width = drm_format_plane_width(fb->width, format, i);
 +              height = drm_format_plane_height(fb->height, format, i);
 +
 +              intel_fb_offset_to_xy(&x, &y, fb, i);
 +
 +              /*
 +               * The fence (if used) is aligned to the start of the object
 +               * so having the framebuffer wrap around across the edge of the
 +               * fenced region doesn't really work. We have no API to configure
 +               * the fence start offset within the object (nor could we probably
 +               * on gen2/3). So it's just easier if we just require that the
 +               * fb layout agrees with the fence layout. We already check that the
 +               * fb stride matches the fence stride elsewhere.
 +               */
 +              if (i915_gem_object_is_tiled(intel_fb->obj) &&
 +                  (x + width) * cpp > fb->pitches[i]) {
 +                      DRM_DEBUG("bad fb plane %d offset: 0x%x\n",
 +                                i, fb->offsets[i]);
 +                      return -EINVAL;
 +              }
 +
 +              /*
 +               * First pixel of the framebuffer from
 +               * the start of the normal gtt mapping.
 +               */
 +              intel_fb->normal[i].x = x;
 +              intel_fb->normal[i].y = y;
 +
 +              offset = _intel_compute_tile_offset(dev_priv, &x, &y,
 +                                                  fb, 0, fb->pitches[i],
 +                                                  DRM_ROTATE_0, tile_size);
 +              offset /= tile_size;
 +
 +              if (fb->modifier[i] != DRM_FORMAT_MOD_NONE) {
 +                      unsigned int tile_width, tile_height;
 +                      unsigned int pitch_tiles;
 +                      struct drm_rect r;
 +
 +                      intel_tile_dims(dev_priv, &tile_width, &tile_height,
 +                                      fb->modifier[i], cpp);
 +
 +                      rot_info->plane[i].offset = offset;
 +                      rot_info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], tile_width * cpp);
 +                      rot_info->plane[i].width = DIV_ROUND_UP(x + width, tile_width);
 +                      rot_info->plane[i].height = DIV_ROUND_UP(y + height, tile_height);
 +
 +                      intel_fb->rotated[i].pitch =
 +                              rot_info->plane[i].height * tile_height;
 +
 +                      /* how many tiles does this plane need */
 +                      size = rot_info->plane[i].stride * rot_info->plane[i].height;
 +                      /*
 +                       * If the plane isn't horizontally tile aligned,
 +                       * we need one more tile.
 +                       */
 +                      if (x != 0)
 +                              size++;
 +
 +                      /* rotate the x/y offsets to match the GTT view */
 +                      r.x1 = x;
 +                      r.y1 = y;
 +                      r.x2 = x + width;
 +                      r.y2 = y + height;
 +                      drm_rect_rotate(&r,
 +                                      rot_info->plane[i].width * tile_width,
 +                                      rot_info->plane[i].height * tile_height,
 +                                      DRM_ROTATE_270);
 +                      x = r.x1;
 +                      y = r.y1;
 +
 +                      /* rotate the tile dimensions to match the GTT view */
 +                      pitch_tiles = intel_fb->rotated[i].pitch / tile_height;
 +                      swap(tile_width, tile_height);
 +
 +                      /*
 +                       * We only keep the x/y offsets, so push all of the
 +                       * gtt offset into the x/y offsets.
 +                       */
 +                      _intel_adjust_tile_offset(&x, &y, tile_size,
 +                                                tile_width, tile_height, pitch_tiles,
 +                                                gtt_offset_rotated * tile_size, 0);
 +
 +                      gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height;
 +
 +                      /*
 +                       * First pixel of the framebuffer from
 +                       * the start of the rotated gtt mapping.
 +                       */
 +                      intel_fb->rotated[i].x = x;
 +                      intel_fb->rotated[i].y = y;
 +              } else {
 +                      size = DIV_ROUND_UP((y + height) * fb->pitches[i] +
 +                                          x * cpp, tile_size);
 +              }
 +
 +              /* how many tiles in total needed in the bo */
 +              max_size = max(max_size, offset + size);
 +      }
 +
 +      if (max_size * tile_size > to_intel_framebuffer(fb)->obj->base.size) {
 +              DRM_DEBUG("fb too big for bo (need %u bytes, have %zu bytes)\n",
 +                        max_size * tile_size, to_intel_framebuffer(fb)->obj->base.size);
 +              return -EINVAL;
 +      }
 +
 +      return 0;
 +}
 +
  static int i9xx_format_to_fourcc(int format)
  {
        switch (format) {
@@@ -2700,8 -2465,9 +2700,8 @@@ intel_alloc_initial_plane_obj(struct in
                return false;
        }
  
 -      obj->tiling_mode = plane_config->tiling;
 -      if (obj->tiling_mode == I915_TILING_X)
 -              obj->stride = fb->pitches[0];
 +      if (plane_config->tiling == I915_TILING_X)
 +              obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X;
  
        mode_cmd.pixel_format = fb->pixel_format;
        mode_cmd.width = fb->width;
        return true;
  
  out_unref_obj:
 -      drm_gem_object_unreference(&obj->base);
 +      i915_gem_object_put(obj);
        mutex_unlock(&dev->struct_mutex);
        return false;
  }
@@@ -2786,7 -2552,7 +2786,7 @@@ intel_find_initial_plane_obj(struct int
                        continue;
  
                obj = intel_fb_obj(fb);
 -              if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) {
 +              if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) {
                        drm_framebuffer_reference(fb);
                        goto valid_fb;
                }
         * simplest solution is to just disable the primary plane now and
         * pretend the BIOS never had it enabled.
         */
 -      to_intel_plane_state(plane_state)->visible = false;
 +      to_intel_plane_state(plane_state)->base.visible = false;
        crtc_state->plane_mask &= ~(1 << drm_plane_index(primary));
        intel_pre_disable_primary_noatomic(&intel_crtc->base);
        intel_plane->disable_plane(primary, &intel_crtc->base);
@@@ -2817,188 -2583,24 +2817,188 @@@ valid_fb
        plane_state->crtc_w = fb->width;
        plane_state->crtc_h = fb->height;
  
 -      intel_state->src.x1 = plane_state->src_x;
 -      intel_state->src.y1 = plane_state->src_y;
 -      intel_state->src.x2 = plane_state->src_x + plane_state->src_w;
 -      intel_state->src.y2 = plane_state->src_y + plane_state->src_h;
 -      intel_state->dst.x1 = plane_state->crtc_x;
 -      intel_state->dst.y1 = plane_state->crtc_y;
 -      intel_state->dst.x2 = plane_state->crtc_x + plane_state->crtc_w;
 -      intel_state->dst.y2 = plane_state->crtc_y + plane_state->crtc_h;
 +      intel_state->base.src.x1 = plane_state->src_x;
 +      intel_state->base.src.y1 = plane_state->src_y;
 +      intel_state->base.src.x2 = plane_state->src_x + plane_state->src_w;
 +      intel_state->base.src.y2 = plane_state->src_y + plane_state->src_h;
 +      intel_state->base.dst.x1 = plane_state->crtc_x;
 +      intel_state->base.dst.y1 = plane_state->crtc_y;
 +      intel_state->base.dst.x2 = plane_state->crtc_x + plane_state->crtc_w;
 +      intel_state->base.dst.y2 = plane_state->crtc_y + plane_state->crtc_h;
  
        obj = intel_fb_obj(fb);
 -      if (obj->tiling_mode != I915_TILING_NONE)
 +      if (i915_gem_object_is_tiled(obj))
                dev_priv->preserve_bios_swizzle = true;
  
        drm_framebuffer_reference(fb);
        primary->fb = primary->state->fb = fb;
        primary->crtc = primary->state->crtc = &intel_crtc->base;
        intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary));
 -      obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit;
 +      atomic_or(to_intel_plane(primary)->frontbuffer_bit,
 +                &obj->frontbuffer_bits);
 +}
 +
 +static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane,
 +                             unsigned int rotation)
 +{
 +      int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
 +
 +      switch (fb->modifier[plane]) {
 +      case DRM_FORMAT_MOD_NONE:
 +      case I915_FORMAT_MOD_X_TILED:
 +              switch (cpp) {
 +              case 8:
 +                      return 4096;
 +              case 4:
 +              case 2:
 +              case 1:
 +                      return 8192;
 +              default:
 +                      MISSING_CASE(cpp);
 +                      break;
 +              }
 +              break;
 +      case I915_FORMAT_MOD_Y_TILED:
 +      case I915_FORMAT_MOD_Yf_TILED:
 +              switch (cpp) {
 +              case 8:
 +                      return 2048;
 +              case 4:
 +                      return 4096;
 +              case 2:
 +              case 1:
 +                      return 8192;
 +              default:
 +                      MISSING_CASE(cpp);
 +                      break;
 +              }
 +              break;
 +      default:
 +              MISSING_CASE(fb->modifier[plane]);
 +      }
 +
 +      return 2048;
 +}
 +
 +static int skl_check_main_surface(struct intel_plane_state *plane_state)
 +{
 +      const struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev);
 +      const struct drm_framebuffer *fb = plane_state->base.fb;
 +      unsigned int rotation = plane_state->base.rotation;
 +      int x = plane_state->base.src.x1 >> 16;
 +      int y = plane_state->base.src.y1 >> 16;
 +      int w = drm_rect_width(&plane_state->base.src) >> 16;
 +      int h = drm_rect_height(&plane_state->base.src) >> 16;
 +      int max_width = skl_max_plane_width(fb, 0, rotation);
 +      int max_height = 4096;
 +      u32 alignment, offset, aux_offset = plane_state->aux.offset;
 +
 +      if (w > max_width || h > max_height) {
 +              DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n",
 +                            w, h, max_width, max_height);
 +              return -EINVAL;
 +      }
 +
 +      intel_add_fb_offsets(&x, &y, plane_state, 0);
 +      offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
 +
 +      alignment = intel_surf_alignment(dev_priv, fb->modifier[0]);
 +
 +      /*
 +       * AUX surface offset is specified as the distance from the
 +       * main surface offset, and it must be non-negative. Make
 +       * sure that is what we will get.
 +       */
 +      if (offset > aux_offset)
 +              offset = intel_adjust_tile_offset(&x, &y, plane_state, 0,
 +                                                offset, aux_offset & ~(alignment - 1));
 +
 +      /*
 +       * When using an X-tiled surface, the plane blows up
 +       * if the x offset + width exceed the stride.
 +       *
 +       * TODO: linear and Y-tiled seem fine, Yf untested,
 +       */
 +      if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) {
 +              int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 +
 +              while ((x + w) * cpp > fb->pitches[0]) {
 +                      if (offset == 0) {
 +                              DRM_DEBUG_KMS("Unable to find suitable display surface offset\n");
 +                              return -EINVAL;
 +                      }
 +
 +                      offset = intel_adjust_tile_offset(&x, &y, plane_state, 0,
 +                                                        offset, offset - alignment);
 +              }
 +      }
 +
 +      plane_state->main.offset = offset;
 +      plane_state->main.x = x;
 +      plane_state->main.y = y;
 +
 +      return 0;
 +}
 +
 +static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state)
 +{
 +      const struct drm_framebuffer *fb = plane_state->base.fb;
 +      unsigned int rotation = plane_state->base.rotation;
 +      int max_width = skl_max_plane_width(fb, 1, rotation);
 +      int max_height = 4096;
 +      int x = plane_state->base.src.x1 >> 17;
 +      int y = plane_state->base.src.y1 >> 17;
 +      int w = drm_rect_width(&plane_state->base.src) >> 17;
 +      int h = drm_rect_height(&plane_state->base.src) >> 17;
 +      u32 offset;
 +
 +      intel_add_fb_offsets(&x, &y, plane_state, 1);
 +      offset = intel_compute_tile_offset(&x, &y, plane_state, 1);
 +
 +      /* FIXME not quite sure how/if these apply to the chroma plane */
 +      if (w > max_width || h > max_height) {
 +              DRM_DEBUG_KMS("CbCr source size %dx%d too big (limit %dx%d)\n",
 +                            w, h, max_width, max_height);
 +              return -EINVAL;
 +      }
 +
 +      plane_state->aux.offset = offset;
 +      plane_state->aux.x = x;
 +      plane_state->aux.y = y;
 +
 +      return 0;
 +}
 +
 +int skl_check_plane_surface(struct intel_plane_state *plane_state)
 +{
 +      const struct drm_framebuffer *fb = plane_state->base.fb;
 +      unsigned int rotation = plane_state->base.rotation;
 +      int ret;
 +
 +      /* Rotate src coordinates to match rotated GTT view */
 +      if (intel_rotation_90_or_270(rotation))
 +              drm_rect_rotate(&plane_state->base.src,
 +                              fb->width, fb->height, DRM_ROTATE_270);
 +
 +      /*
 +       * Handle the AUX surface first since
 +       * the main surface setup depends on it.
 +       */
 +      if (fb->pixel_format == DRM_FORMAT_NV12) {
 +              ret = skl_check_nv12_aux_surface(plane_state);
 +              if (ret)
 +                      return ret;
 +      } else {
 +              plane_state->aux.offset = ~0xfff;
 +              plane_state->aux.x = 0;
 +              plane_state->aux.y = 0;
 +      }
 +
 +      ret = skl_check_main_surface(plane_state);
 +      if (ret)
 +              return ret;
 +
 +      return 0;
  }
  
  static void i9xx_update_primary_plane(struct drm_plane *primary,
        u32 dspcntr;
        i915_reg_t reg = DSPCNTR(plane);
        unsigned int rotation = plane_state->base.rotation;
 -      int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 -      int x = plane_state->src.x1 >> 16;
 -      int y = plane_state->src.y1 >> 16;
 +      int x = plane_state->base.src.x1 >> 16;
 +      int y = plane_state->base.src.y1 >> 16;
  
        dspcntr = DISPPLANE_GAMMA_ENABLE;
  
                BUG();
        }
  
 -      if (INTEL_INFO(dev)->gen >= 4 &&
 -          obj->tiling_mode != I915_TILING_NONE)
 +      if (INTEL_GEN(dev_priv) >= 4 &&
 +          fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
                dspcntr |= DISPPLANE_TILED;
  
        if (IS_G4X(dev))
                dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
  
 -      linear_offset = y * fb->pitches[0] + x * cpp;
 +      intel_add_fb_offsets(&x, &y, plane_state, 0);
  
 -      if (INTEL_INFO(dev)->gen >= 4) {
 +      if (INTEL_INFO(dev)->gen >= 4)
                intel_crtc->dspaddr_offset =
 -                      intel_compute_tile_offset(&x, &y, fb, 0,
 -                                                fb->pitches[0], rotation);
 -              linear_offset -= intel_crtc->dspaddr_offset;
 -      } else {
 -              intel_crtc->dspaddr_offset = linear_offset;
 -      }
 +                      intel_compute_tile_offset(&x, &y, plane_state, 0);
  
 -      if (rotation == BIT(DRM_ROTATE_180)) {
 +      if (rotation == DRM_ROTATE_180) {
                dspcntr |= DISPPLANE_ROTATE_180;
  
                x += (crtc_state->pipe_src_w - 1);
                y += (crtc_state->pipe_src_h - 1);
 -
 -              /* Finding the last pixel of the last line of the display
 -              data and adding to linear_offset*/
 -              linear_offset +=
 -                      (crtc_state->pipe_src_h - 1) * fb->pitches[0] +
 -                      (crtc_state->pipe_src_w - 1) * cpp;
        }
  
 +      linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
 +
 +      if (INTEL_INFO(dev)->gen < 4)
 +              intel_crtc->dspaddr_offset = linear_offset;
 +
        intel_crtc->adjusted_x = x;
        intel_crtc->adjusted_y = y;
  
        I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
        if (INTEL_INFO(dev)->gen >= 4) {
                I915_WRITE(DSPSURF(plane),
 -                         i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
 +                         intel_fb_gtt_offset(fb, rotation) +
 +                         intel_crtc->dspaddr_offset);
                I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
                I915_WRITE(DSPLINOFF(plane), linear_offset);
        } else
 -              I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset);
 +              I915_WRITE(DSPADDR(plane), i915_gem_object_ggtt_offset(obj, NULL) + linear_offset);
        POSTING_READ(reg);
  }
  
@@@ -3133,13 -2741,15 +3133,13 @@@ static void ironlake_update_primary_pla
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
        struct drm_framebuffer *fb = plane_state->base.fb;
 -      struct drm_i915_gem_object *obj = intel_fb_obj(fb);
        int plane = intel_crtc->plane;
        u32 linear_offset;
        u32 dspcntr;
        i915_reg_t reg = DSPCNTR(plane);
        unsigned int rotation = plane_state->base.rotation;
 -      int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 -      int x = plane_state->src.x1 >> 16;
 -      int y = plane_state->src.y1 >> 16;
 +      int x = plane_state->base.src.x1 >> 16;
 +      int y = plane_state->base.src.y1 >> 16;
  
        dspcntr = DISPPLANE_GAMMA_ENABLE;
        dspcntr |= DISPLAY_PLANE_ENABLE;
                BUG();
        }
  
 -      if (obj->tiling_mode != I915_TILING_NONE)
 +      if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
                dspcntr |= DISPPLANE_TILED;
  
        if (!IS_HASWELL(dev) && !IS_BROADWELL(dev))
                dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
  
 -      linear_offset = y * fb->pitches[0] + x * cpp;
 +      intel_add_fb_offsets(&x, &y, plane_state, 0);
 +
        intel_crtc->dspaddr_offset =
 -              intel_compute_tile_offset(&x, &y, fb, 0,
 -                                        fb->pitches[0], rotation);
 -      linear_offset -= intel_crtc->dspaddr_offset;
 -      if (rotation == BIT(DRM_ROTATE_180)) {
 +              intel_compute_tile_offset(&x, &y, plane_state, 0);
 +
 +      if (rotation == DRM_ROTATE_180) {
                dspcntr |= DISPPLANE_ROTATE_180;
  
                if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
                        x += (crtc_state->pipe_src_w - 1);
                        y += (crtc_state->pipe_src_h - 1);
 -
 -                      /* Finding the last pixel of the last line of the display
 -                      data and adding to linear_offset*/
 -                      linear_offset +=
 -                              (crtc_state->pipe_src_h - 1) * fb->pitches[0] +
 -                              (crtc_state->pipe_src_w - 1) * cpp;
                }
        }
  
 +      linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
 +
        intel_crtc->adjusted_x = x;
        intel_crtc->adjusted_y = y;
  
  
        I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
        I915_WRITE(DSPSURF(plane),
 -                 i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
 +                 intel_fb_gtt_offset(fb, rotation) +
 +                 intel_crtc->dspaddr_offset);
        if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
                I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
        } else {
@@@ -3222,21 -2835,32 +3222,21 @@@ u32 intel_fb_stride_alignment(const str
        }
  }
  
 -u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
 -                         struct drm_i915_gem_object *obj,
 -                         unsigned int plane)
 +u32 intel_fb_gtt_offset(struct drm_framebuffer *fb,
 +                      unsigned int rotation)
  {
 +      struct drm_i915_gem_object *obj = intel_fb_obj(fb);
        struct i915_ggtt_view view;
        struct i915_vma *vma;
 -      u64 offset;
  
 -      intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb,
 -                              intel_plane->base.state->rotation);
 +      intel_fill_fb_ggtt_view(&view, fb, rotation);
  
 -      vma = i915_gem_obj_to_ggtt_view(obj, &view);
 +      vma = i915_gem_object_to_ggtt(obj, &view);
        if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n",
 -              view.type))
 +               view.type))
                return -1;
  
 -      offset = vma->node.start;
 -
 -      if (plane == 1) {
 -              offset += vma->ggtt_view.params.rotated.uv_start_page *
 -                        PAGE_SIZE;
 -      }
 -
 -      WARN_ON(upper_32_bits(offset));
 -
 -      return lower_32_bits(offset);
 +      return i915_ggtt_offset(vma);
  }
  
  static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id)
@@@ -3266,28 -2890,6 +3266,28 @@@ static void skl_detach_scalers(struct i
        }
  }
  
 +u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
 +                   unsigned int rotation)
 +{
 +      const struct drm_i915_private *dev_priv = to_i915(fb->dev);
 +      u32 stride = intel_fb_pitch(fb, plane, rotation);
 +
 +      /*
 +       * The stride is either expressed as a multiple of 64 bytes chunks for
 +       * linear buffers or in number of tiles for tiled buffers.
 +       */
 +      if (intel_rotation_90_or_270(rotation)) {
 +              int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
 +
 +              stride /= intel_tile_height(dev_priv, fb->modifier[0], cpp);
 +      } else {
 +              stride /= intel_fb_stride_alignment(dev_priv, fb->modifier[0],
 +                                                  fb->pixel_format);
 +      }
 +
 +      return stride;
 +}
 +
  u32 skl_plane_ctl_format(uint32_t pixel_format)
  {
        switch (pixel_format) {
@@@ -3350,17 -2952,17 +3350,17 @@@ u32 skl_plane_ctl_tiling(uint64_t fb_mo
  u32 skl_plane_ctl_rotation(unsigned int rotation)
  {
        switch (rotation) {
 -      case BIT(DRM_ROTATE_0):
 +      case DRM_ROTATE_0:
                break;
        /*
         * DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr
         * while i915 HW rotation is clockwise, thats why this swapping.
         */
 -      case BIT(DRM_ROTATE_90):
 +      case DRM_ROTATE_90:
                return PLANE_CTL_ROTATE_270;
 -      case BIT(DRM_ROTATE_180):
 +      case DRM_ROTATE_180:
                return PLANE_CTL_ROTATE_180;
 -      case BIT(DRM_ROTATE_270):
 +      case DRM_ROTATE_270:
                return PLANE_CTL_ROTATE_90;
        default:
                MISSING_CASE(rotation);
@@@ -3377,21 -2979,22 +3377,21 @@@ static void skylake_update_primary_plan
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
        struct drm_framebuffer *fb = plane_state->base.fb;
 -      struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 +      const struct skl_wm_values *wm = &dev_priv->wm.skl_results;
        int pipe = intel_crtc->pipe;
 -      u32 plane_ctl, stride_div, stride;
 -      u32 tile_height, plane_offset, plane_size;
 +      u32 plane_ctl;
        unsigned int rotation = plane_state->base.rotation;
 -      int x_offset, y_offset;
 -      u32 surf_addr;
 +      u32 stride = skl_plane_stride(fb, 0, rotation);
 +      u32 surf_addr = plane_state->main.offset;
        int scaler_id = plane_state->scaler_id;
 -      int src_x = plane_state->src.x1 >> 16;
 -      int src_y = plane_state->src.y1 >> 16;
 -      int src_w = drm_rect_width(&plane_state->src) >> 16;
 -      int src_h = drm_rect_height(&plane_state->src) >> 16;
 -      int dst_x = plane_state->dst.x1;
 -      int dst_y = plane_state->dst.y1;
 -      int dst_w = drm_rect_width(&plane_state->dst);
 -      int dst_h = drm_rect_height(&plane_state->dst);
 +      int src_x = plane_state->main.x;
 +      int src_y = plane_state->main.y;
 +      int src_w = drm_rect_width(&plane_state->base.src) >> 16;
 +      int src_h = drm_rect_height(&plane_state->base.src) >> 16;
 +      int dst_x = plane_state->base.dst.x1;
 +      int dst_y = plane_state->base.dst.y1;
 +      int dst_w = drm_rect_width(&plane_state->base.dst);
 +      int dst_h = drm_rect_height(&plane_state->base.dst);
  
        plane_ctl = PLANE_CTL_ENABLE |
                    PLANE_CTL_PIPE_GAMMA_ENABLE |
        plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE;
        plane_ctl |= skl_plane_ctl_rotation(rotation);
  
 -      stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0],
 -                                             fb->pixel_format);
 -      surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj, 0);
 +      /* Sizes are 0 based */
 +      src_w--;
 +      src_h--;
 +      dst_w--;
 +      dst_h--;
  
 -      WARN_ON(drm_rect_width(&plane_state->src) == 0);
 +      intel_crtc->adjusted_x = src_x;
 +      intel_crtc->adjusted_y = src_y;
  
 -      if (intel_rotation_90_or_270(rotation)) {
 -              int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 -
 -              /* stride = Surface height in tiles */
 -              tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp);
 -              stride = DIV_ROUND_UP(fb->height, tile_height);
 -              x_offset = stride * tile_height - src_y - src_h;
 -              y_offset = src_x;
 -              plane_size = (src_w - 1) << 16 | (src_h - 1);
 -      } else {
 -              stride = fb->pitches[0] / stride_div;
 -              x_offset = src_x;
 -              y_offset = src_y;
 -              plane_size = (src_h - 1) << 16 | (src_w - 1);
 -      }
 -      plane_offset = y_offset << 16 | x_offset;
 -
 -      intel_crtc->adjusted_x = x_offset;
 -      intel_crtc->adjusted_y = y_offset;
 +      if (wm->dirty_pipes & drm_crtc_mask(&intel_crtc->base))
 +              skl_write_plane_wm(intel_crtc, wm, 0);
  
        I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl);
 -      I915_WRITE(PLANE_OFFSET(pipe, 0), plane_offset);
 -      I915_WRITE(PLANE_SIZE(pipe, 0), plane_size);
 +      I915_WRITE(PLANE_OFFSET(pipe, 0), (src_y << 16) | src_x);
        I915_WRITE(PLANE_STRIDE(pipe, 0), stride);
 +      I915_WRITE(PLANE_SIZE(pipe, 0), (src_h << 16) | src_w);
  
        if (scaler_id >= 0) {
                uint32_t ps_ctrl = 0;
                I915_WRITE(PLANE_POS(pipe, 0), (dst_y << 16) | dst_x);
        }
  
 -      I915_WRITE(PLANE_SURF(pipe, 0), surf_addr);
 +      I915_WRITE(PLANE_SURF(pipe, 0),
 +                 intel_fb_gtt_offset(fb, rotation) + surf_addr);
  
        POSTING_READ(PLANE_SURF(pipe, 0));
  }
@@@ -3445,15 -3061,7 +3445,15 @@@ static void skylake_disable_primary_pla
  {
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
 -      int pipe = to_intel_crtc(crtc)->pipe;
 +      struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 +      int pipe = intel_crtc->pipe;
 +
 +      /*
 +       * We only populate skl_results on watermark updates, and if the
 +       * plane's visiblity isn't actually changing neither is its watermarks.
 +       */
 +      if (!crtc->primary->state->visible)
 +              skl_write_plane_wm(intel_crtc, &dev_priv->wm.skl_results, 0);
  
        I915_WRITE(PLANE_CTL(pipe, 0), 0);
        I915_WRITE(PLANE_SURF(pipe, 0), 0);
@@@ -3488,7 -3096,7 +3488,7 @@@ static void intel_update_primary_planes
                struct intel_plane_state *plane_state =
                        to_intel_plane_state(plane->base.state);
  
 -              if (plane_state->visible)
 +              if (plane_state->base.visible)
                        plane->update_plane(&plane->base,
                                            to_intel_crtc_state(crtc->state),
                                            plane_state);
@@@ -3527,12 -3135,6 +3527,12 @@@ __intel_display_resume(struct drm_devic
        return ret;
  }
  
 +static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
 +{
 +      return intel_has_gpu_reset(dev_priv) &&
 +              INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv);
 +}
 +
  void intel_prepare_reset(struct drm_i915_private *dev_priv)
  {
        struct drm_device *dev = &dev_priv->drm;
        struct drm_atomic_state *state;
        int ret;
  
 -      /* no reset support for gen2 */
 -      if (IS_GEN2(dev_priv))
 -              return;
 -
        /*
         * Need mode_config.mutex so that we don't
         * trample ongoing ->detect() and whatnot.
        }
  
        /* reset doesn't touch the display, but flips might get nuked anyway, */
 -      if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
 +      if (!i915.force_reset_modeset_test &&
 +          !gpu_reset_clobbers_display(dev_priv))
                return;
  
        /*
@@@ -3599,26 -3204,24 +3599,28 @@@ void intel_finish_reset(struct drm_i915
         */
        intel_complete_page_flips(dev_priv);
  
 -      /* no reset support for gen2 */
 -      if (IS_GEN2(dev_priv))
 -              return;
 +      dev_priv->modeset_restore_state = NULL;
  
+       dev_priv->modeset_restore_state = NULL;
        /* reset doesn't touch the display */
 -      if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
 -              /*
 -               * Flips in the rings have been nuked by the reset,
 -               * so update the base address of all primary
 -               * planes to the the last fb to make sure we're
 -               * showing the correct fb after a reset.
 -               *
 -               * FIXME: Atomic will make this obsolete since we won't schedule
 -               * CS-based flips (which might get lost in gpu resets) any more.
 -               */
 -              intel_update_primary_planes(dev);
 +      if (!gpu_reset_clobbers_display(dev_priv)) {
 +              if (!state) {
 +                      /*
 +                       * Flips in the rings have been nuked by the reset,
 +                       * so update the base address of all primary
 +                       * planes to the the last fb to make sure we're
 +                       * showing the correct fb after a reset.
 +                       *
 +                       * FIXME: Atomic will make this obsolete since we won't schedule
 +                       * CS-based flips (which might get lost in gpu resets) any more.
 +                       */
 +                      intel_update_primary_planes(dev);
 +              } else {
 +                      ret = __intel_display_resume(dev, state);
 +                      if (ret)
 +                              DRM_ERROR("Restoring old state failed with %i\n", ret);
 +              }
        } else {
                /*
                 * The display has been reset as well,
        mutex_unlock(&dev->mode_config.mutex);
  }
  
 +static bool abort_flip_on_reset(struct intel_crtc *crtc)
 +{
 +      struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error;
 +
 +      if (i915_reset_in_progress(error))
 +              return true;
 +
 +      if (crtc->reset_count != i915_reset_count(error))
 +              return true;
 +
 +      return false;
 +}
 +
  static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
  {
        struct drm_device *dev = crtc->dev;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 -      unsigned reset_counter;
        bool pending;
  
 -      reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
 -      if (intel_crtc->reset_counter != reset_counter)
 +      if (abort_flip_on_reset(intel_crtc))
                return false;
  
        spin_lock_irq(&dev->event_lock);
@@@ -4308,7 -3900,7 +4310,7 @@@ static int intel_crtc_wait_for_pending_
        return 0;
  }
  
 -static void lpt_disable_iclkip(struct drm_i915_private *dev_priv)
 +void lpt_disable_iclkip(struct drm_i915_private *dev_priv)
  {
        u32 temp;
  
@@@ -4731,7 -4323,7 +4733,7 @@@ int skl_update_scaler_crtc(struct intel
                      intel_crtc->pipe, SKL_CRTC_INDEX);
  
        return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
 -              &state->scaler_state.scaler_id, BIT(DRM_ROTATE_0),
 +              &state->scaler_state.scaler_id, DRM_ROTATE_0,
                state->pipe_src_w, state->pipe_src_h,
                adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
  }
@@@ -4756,7 -4348,7 +4758,7 @@@ static int skl_update_scaler_plane(stru
        struct drm_framebuffer *fb = plane_state->base.fb;
        int ret;
  
 -      bool force_detach = !fb || !plane_state->visible;
 +      bool force_detach = !fb || !plane_state->base.visible;
  
        DRM_DEBUG_KMS("Updating scaler for [PLANE:%d:%s] scaler_user index %u.%u\n",
                      intel_plane->base.base.id, intel_plane->base.name,
                                drm_plane_index(&intel_plane->base),
                                &plane_state->scaler_id,
                                plane_state->base.rotation,
 -                              drm_rect_width(&plane_state->src) >> 16,
 -                              drm_rect_height(&plane_state->src) >> 16,
 -                              drm_rect_width(&plane_state->dst),
 -                              drm_rect_height(&plane_state->dst));
 +                              drm_rect_width(&plane_state->base.src) >> 16,
 +                              drm_rect_height(&plane_state->base.src) >> 16,
 +                              drm_rect_width(&plane_state->base.dst),
 +                              drm_rect_height(&plane_state->base.dst));
  
        if (ret || plane_state->scaler_id < 0)
                return ret;
@@@ -5047,11 -4639,12 +5049,11 @@@ static void intel_post_plane_update(str
        struct drm_atomic_state *old_state = old_crtc_state->base.state;
        struct intel_crtc_state *pipe_config =
                to_intel_crtc_state(crtc->base.state);
 -      struct drm_device *dev = crtc->base.dev;
        struct drm_plane *primary = crtc->base.primary;
        struct drm_plane_state *old_pri_state =
                drm_atomic_get_existing_plane_state(old_state, primary);
  
 -      intel_frontbuffer_flip(dev, pipe_config->fb_bits);
 +      intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits);
  
        crtc->wm.cxsr_allowed = true;
  
  
                intel_fbc_post_update(crtc);
  
 -              if (primary_state->visible &&
 +              if (primary_state->base.visible &&
                    (needs_modeset(&pipe_config->base) ||
 -                   !old_primary_state->visible))
 +                   !old_primary_state->base.visible))
                        intel_post_enable_primary(&crtc->base);
        }
  }
@@@ -5094,8 -4687,8 +5096,8 @@@ static void intel_pre_plane_update(stru
  
                intel_fbc_pre_update(crtc, pipe_config, primary_state);
  
 -              if (old_primary_state->visible &&
 -                  (modeset || !primary_state->visible))
 +              if (old_primary_state->base.visible &&
 +                  (modeset || !primary_state->base.visible))
                        intel_pre_disable_primary(&crtc->base);
        }
  
@@@ -5174,140 -4767,18 +5176,140 @@@ static void intel_crtc_disable_planes(s
         * to compute the mask of flip planes precisely. For the time being
         * consider this a flip to a NULL plane.
         */
 -      intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
 +      intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe));
 +}
 +
 +static void intel_encoders_pre_pll_enable(struct drm_crtc *crtc,
 +                                        struct intel_crtc_state *crtc_state,
 +                                        struct drm_atomic_state *old_state)
 +{
 +      struct drm_connector_state *old_conn_state;
 +      struct drm_connector *conn;
 +      int i;
 +
 +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
 +              struct drm_connector_state *conn_state = conn->state;
 +              struct intel_encoder *encoder =
 +                      to_intel_encoder(conn_state->best_encoder);
 +
 +              if (conn_state->crtc != crtc)
 +                      continue;
 +
 +              if (encoder->pre_pll_enable)
 +                      encoder->pre_pll_enable(encoder, crtc_state, conn_state);
 +      }
 +}
 +
 +static void intel_encoders_pre_enable(struct drm_crtc *crtc,
 +                                    struct intel_crtc_state *crtc_state,
 +                                    struct drm_atomic_state *old_state)
 +{
 +      struct drm_connector_state *old_conn_state;
 +      struct drm_connector *conn;
 +      int i;
 +
 +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
 +              struct drm_connector_state *conn_state = conn->state;
 +              struct intel_encoder *encoder =
 +                      to_intel_encoder(conn_state->best_encoder);
 +
 +              if (conn_state->crtc != crtc)
 +                      continue;
 +
 +              if (encoder->pre_enable)
 +                      encoder->pre_enable(encoder, crtc_state, conn_state);
 +      }
 +}
 +
 +static void intel_encoders_enable(struct drm_crtc *crtc,
 +                                struct intel_crtc_state *crtc_state,
 +                                struct drm_atomic_state *old_state)
 +{
 +      struct drm_connector_state *old_conn_state;
 +      struct drm_connector *conn;
 +      int i;
 +
 +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
 +              struct drm_connector_state *conn_state = conn->state;
 +              struct intel_encoder *encoder =
 +                      to_intel_encoder(conn_state->best_encoder);
 +
 +              if (conn_state->crtc != crtc)
 +                      continue;
 +
 +              encoder->enable(encoder, crtc_state, conn_state);
 +              intel_opregion_notify_encoder(encoder, true);
 +      }
 +}
 +
 +static void intel_encoders_disable(struct drm_crtc *crtc,
 +                                 struct intel_crtc_state *old_crtc_state,
 +                                 struct drm_atomic_state *old_state)
 +{
 +      struct drm_connector_state *old_conn_state;
 +      struct drm_connector *conn;
 +      int i;
 +
 +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
 +              struct intel_encoder *encoder =
 +                      to_intel_encoder(old_conn_state->best_encoder);
 +
 +              if (old_conn_state->crtc != crtc)
 +                      continue;
 +
 +              intel_opregion_notify_encoder(encoder, false);
 +              encoder->disable(encoder, old_crtc_state, old_conn_state);
 +      }
 +}
 +
 +static void intel_encoders_post_disable(struct drm_crtc *crtc,
 +                                      struct intel_crtc_state *old_crtc_state,
 +                                      struct drm_atomic_state *old_state)
 +{
 +      struct drm_connector_state *old_conn_state;
 +      struct drm_connector *conn;
 +      int i;
 +
 +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
 +              struct intel_encoder *encoder =
 +                      to_intel_encoder(old_conn_state->best_encoder);
 +
 +              if (old_conn_state->crtc != crtc)
 +                      continue;
 +
 +              if (encoder->post_disable)
 +                      encoder->post_disable(encoder, old_crtc_state, old_conn_state);
 +      }
 +}
 +
 +static void intel_encoders_post_pll_disable(struct drm_crtc *crtc,
 +                                          struct intel_crtc_state *old_crtc_state,
 +                                          struct drm_atomic_state *old_state)
 +{
 +      struct drm_connector_state *old_conn_state;
 +      struct drm_connector *conn;
 +      int i;
 +
 +      for_each_connector_in_state(old_state, conn, old_conn_state, i) {
 +              struct intel_encoder *encoder =
 +                      to_intel_encoder(old_conn_state->best_encoder);
 +
 +              if (old_conn_state->crtc != crtc)
 +                      continue;
 +
 +              if (encoder->post_pll_disable)
 +                      encoder->post_pll_disable(encoder, old_crtc_state, old_conn_state);
 +      }
  }
  
 -static void ironlake_crtc_enable(struct drm_crtc *crtc)
 +static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config,
 +                               struct drm_atomic_state *old_state)
  {
 +      struct drm_crtc *crtc = pipe_config->base.crtc;
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 -      struct intel_encoder *encoder;
        int pipe = intel_crtc->pipe;
 -      struct intel_crtc_state *pipe_config =
 -              to_intel_crtc_state(crtc->state);
  
        if (WARN_ON(intel_crtc->active))
                return;
  
        intel_crtc->active = true;
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              if (encoder->pre_enable)
 -                      encoder->pre_enable(encoder);
 +      intel_encoders_pre_enable(crtc, pipe_config, old_state);
  
        if (intel_crtc->config->has_pch_encoder) {
                /* Note: FDI PLL enabling _must_ be done before we enable the
        assert_vblank_disabled(crtc);
        drm_crtc_vblank_on(crtc);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              encoder->enable(encoder);
 +      intel_encoders_enable(crtc, pipe_config, old_state);
  
        if (HAS_PCH_CPT(dev))
                cpt_verify_modeset(dev, intel_crtc->pipe);
@@@ -5393,15 -4867,16 +5395,15 @@@ static bool hsw_crtc_supports_ips(struc
        return HAS_IPS(crtc->base.dev) && crtc->pipe == PIPE_A;
  }
  
 -static void haswell_crtc_enable(struct drm_crtc *crtc)
 +static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
 +                              struct drm_atomic_state *old_state)
  {
 +      struct drm_crtc *crtc = pipe_config->base.crtc;
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 -      struct intel_encoder *encoder;
        int pipe = intel_crtc->pipe, hsw_workaround_pipe;
        enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
 -      struct intel_crtc_state *pipe_config =
 -              to_intel_crtc_state(crtc->state);
  
        if (WARN_ON(intel_crtc->active))
                return;
                intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
                                                      false);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              if (encoder->pre_pll_enable)
 -                      encoder->pre_pll_enable(encoder);
 +      intel_encoders_pre_pll_enable(crtc, pipe_config, old_state);
  
        if (intel_crtc->config->shared_dpll)
                intel_enable_shared_dpll(intel_crtc);
        else
                intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder) {
 -              if (encoder->pre_enable)
 -                      encoder->pre_enable(encoder);
 -      }
 +      intel_encoders_pre_enable(crtc, pipe_config, old_state);
  
        if (intel_crtc->config->has_pch_encoder)
                dev_priv->display.fdi_link_train(crtc);
        assert_vblank_disabled(crtc);
        drm_crtc_vblank_on(crtc);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder) {
 -              encoder->enable(encoder);
 -              intel_opregion_notify_encoder(encoder, true);
 -      }
 +      intel_encoders_enable(crtc, pipe_config, old_state);
  
        if (intel_crtc->config->has_pch_encoder) {
                intel_wait_for_vblank(dev, pipe);
@@@ -5523,13 -5006,12 +5525,13 @@@ static void ironlake_pfit_disable(struc
        }
  }
  
 -static void ironlake_crtc_disable(struct drm_crtc *crtc)
 +static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state,
 +                                struct drm_atomic_state *old_state)
  {
 +      struct drm_crtc *crtc = old_crtc_state->base.crtc;
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 -      struct intel_encoder *encoder;
        int pipe = intel_crtc->pipe;
  
        /*
                intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
        }
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              encoder->disable(encoder);
 +      intel_encoders_disable(crtc, old_crtc_state, old_state);
  
        drm_crtc_vblank_off(crtc);
        assert_vblank_disabled(crtc);
        if (intel_crtc->config->has_pch_encoder)
                ironlake_fdi_disable(crtc);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              if (encoder->post_disable)
 -                      encoder->post_disable(encoder);
 +      intel_encoders_post_disable(crtc, old_crtc_state, old_state);
  
        if (intel_crtc->config->has_pch_encoder) {
                ironlake_disable_pch_transcoder(dev_priv, pipe);
        intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
  }
  
 -static void haswell_crtc_disable(struct drm_crtc *crtc)
 +static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state,
 +                               struct drm_atomic_state *old_state)
  {
 +      struct drm_crtc *crtc = old_crtc_state->base.crtc;
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 -      struct intel_encoder *encoder;
        enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
  
        if (intel_crtc->config->has_pch_encoder)
                intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
                                                      false);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder) {
 -              intel_opregion_notify_encoder(encoder, false);
 -              encoder->disable(encoder);
 -      }
 +      intel_encoders_disable(crtc, old_crtc_state, old_state);
  
        drm_crtc_vblank_off(crtc);
        assert_vblank_disabled(crtc);
        if (!transcoder_is_dsi(cpu_transcoder))
                intel_ddi_disable_pipe_clock(intel_crtc);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              if (encoder->post_disable)
 -                      encoder->post_disable(encoder);
 -
 -      if (intel_crtc->config->has_pch_encoder) {
 -              lpt_disable_pch_transcoder(dev_priv);
 -              lpt_disable_iclkip(dev_priv);
 -              intel_ddi_fdi_disable(crtc);
 +      intel_encoders_post_disable(crtc, old_crtc_state, old_state);
  
 +      if (old_crtc_state->has_pch_encoder)
                intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
                                                      true);
 -      }
  }
  
  static void i9xx_pfit_enable(struct intel_crtc *crtc)
@@@ -6680,13 -6174,14 +6682,13 @@@ static void valleyview_modeset_commit_c
        intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A);
  }
  
 -static void valleyview_crtc_enable(struct drm_crtc *crtc)
 +static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config,
 +                                 struct drm_atomic_state *old_state)
  {
 +      struct drm_crtc *crtc = pipe_config->base.crtc;
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 -      struct intel_encoder *encoder;
 -      struct intel_crtc_state *pipe_config =
 -              to_intel_crtc_state(crtc->state);
        int pipe = intel_crtc->pipe;
  
        if (WARN_ON(intel_crtc->active))
  
        intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              if (encoder->pre_pll_enable)
 -                      encoder->pre_pll_enable(encoder);
 +      intel_encoders_pre_pll_enable(crtc, pipe_config, old_state);
  
        if (IS_CHERRYVIEW(dev)) {
                chv_prepare_pll(intel_crtc, intel_crtc->config);
                vlv_enable_pll(intel_crtc, intel_crtc->config);
        }
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              if (encoder->pre_enable)
 -                      encoder->pre_enable(encoder);
 +      intel_encoders_pre_enable(crtc, pipe_config, old_state);
  
        i9xx_pfit_enable(intel_crtc);
  
        assert_vblank_disabled(crtc);
        drm_crtc_vblank_on(crtc);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              encoder->enable(encoder);
 +      intel_encoders_enable(crtc, pipe_config, old_state);
  }
  
  static void i9xx_set_pll_dividers(struct intel_crtc *crtc)
        I915_WRITE(FP1(crtc->pipe), crtc->config->dpll_hw_state.fp1);
  }
  
 -static void i9xx_crtc_enable(struct drm_crtc *crtc)
 +static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
 +                           struct drm_atomic_state *old_state)
  {
 +      struct drm_crtc *crtc = pipe_config->base.crtc;
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 -      struct intel_encoder *encoder;
 -      struct intel_crtc_state *pipe_config =
 -              to_intel_crtc_state(crtc->state);
        enum pipe pipe = intel_crtc->pipe;
  
        if (WARN_ON(intel_crtc->active))
        if (!IS_GEN2(dev))
                intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              if (encoder->pre_enable)
 -                      encoder->pre_enable(encoder);
 +      intel_encoders_pre_enable(crtc, pipe_config, old_state);
  
        i9xx_enable_pll(intel_crtc);
  
        assert_vblank_disabled(crtc);
        drm_crtc_vblank_on(crtc);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              encoder->enable(encoder);
 +      intel_encoders_enable(crtc, pipe_config, old_state);
  }
  
  static void i9xx_pfit_disable(struct intel_crtc *crtc)
        I915_WRITE(PFIT_CONTROL, 0);
  }
  
 -static void i9xx_crtc_disable(struct drm_crtc *crtc)
 +static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
 +                            struct drm_atomic_state *old_state)
  {
 +      struct drm_crtc *crtc = old_crtc_state->base.crtc;
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 -      struct intel_encoder *encoder;
        int pipe = intel_crtc->pipe;
  
        /*
        if (IS_GEN2(dev))
                intel_wait_for_vblank(dev, pipe);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              encoder->disable(encoder);
 +      intel_encoders_disable(crtc, old_crtc_state, old_state);
  
        drm_crtc_vblank_off(crtc);
        assert_vblank_disabled(crtc);
  
        i9xx_pfit_disable(intel_crtc);
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              if (encoder->post_disable)
 -                      encoder->post_disable(encoder);
 +      intel_encoders_post_disable(crtc, old_crtc_state, old_state);
  
        if (!intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DSI)) {
                if (IS_CHERRYVIEW(dev))
                        i9xx_disable_pll(intel_crtc);
        }
  
 -      for_each_encoder_on_crtc(dev, crtc, encoder)
 -              if (encoder->post_pll_disable)
 -                      encoder->post_pll_disable(encoder);
 +      intel_encoders_post_pll_disable(crtc, old_crtc_state, old_state);
  
        if (!IS_GEN2(dev))
                intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
@@@ -6853,34 -6361,20 +6855,34 @@@ static void intel_crtc_disable_noatomic
        struct drm_i915_private *dev_priv = to_i915(crtc->dev);
        enum intel_display_power_domain domain;
        unsigned long domains;
 +      struct drm_atomic_state *state;
 +      struct intel_crtc_state *crtc_state;
 +      int ret;
  
        if (!intel_crtc->active)
                return;
  
 -      if (to_intel_plane_state(crtc->primary->state)->visible) {
 +      if (to_intel_plane_state(crtc->primary->state)->base.visible) {
                WARN_ON(intel_crtc->flip_work);
  
                intel_pre_disable_primary_noatomic(crtc);
  
                intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary));
 -              to_intel_plane_state(crtc->primary->state)->visible = false;
 +              to_intel_plane_state(crtc->primary->state)->base.visible = false;
        }
  
 -      dev_priv->display.crtc_disable(crtc);
 +      state = drm_atomic_state_alloc(crtc->dev);
 +      state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
 +
 +      /* Everything's already locked, -EDEADLK can't happen. */
 +      crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
 +      ret = drm_atomic_add_affected_connectors(state, crtc);
 +
 +      WARN_ON(IS_ERR(crtc_state) || ret);
 +
 +      dev_priv->display.crtc_disable(crtc_state, state);
 +
 +      drm_atomic_state_free(state);
  
        DRM_DEBUG_KMS("[CRTC:%d:%s] hw state adjusted, was enabled, now disabled\n",
                      crtc->base.id, crtc->name);
@@@ -7395,10 -6889,9 +7397,10 @@@ static int i9xx_misc_get_display_clock_
  
  static int pnv_get_display_clock_speed(struct drm_device *dev)
  {
 +      struct pci_dev *pdev = dev->pdev;
        u16 gcfgc = 0;
  
 -      pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
 +      pci_read_config_word(pdev, GCFGC, &gcfgc);
  
        switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
        case GC_DISPLAY_CLOCK_267_MHZ_PNV:
  
  static int i915gm_get_display_clock_speed(struct drm_device *dev)
  {
 +      struct pci_dev *pdev = dev->pdev;
        u16 gcfgc = 0;
  
 -      pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
 +      pci_read_config_word(pdev, GCFGC, &gcfgc);
  
        if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
                return 133333;
@@@ -7445,7 -6937,6 +7447,7 @@@ static int i865_get_display_clock_speed
  
  static int i85x_get_display_clock_speed(struct drm_device *dev)
  {
 +      struct pci_dev *pdev = dev->pdev;
        u16 hpllcc = 0;
  
        /*
         * encoding is different :(
         * FIXME is this the right way to detect 852GM/852GMV?
         */
 -      if (dev->pdev->revision == 0x1)
 +      if (pdev->revision == 0x1)
                return 133333;
  
 -      pci_bus_read_config_word(dev->pdev->bus,
 +      pci_bus_read_config_word(pdev->bus,
                                 PCI_DEVFN(0, 3), HPLLCC, &hpllcc);
  
        /* Assume that the hardware is in the high speed state.  This
@@@ -7557,11 -7048,10 +7559,11 @@@ static unsigned int intel_hpll_vco(stru
  
  static int gm45_get_display_clock_speed(struct drm_device *dev)
  {
 +      struct pci_dev *pdev = dev->pdev;
        unsigned int cdclk_sel, vco = intel_hpll_vco(dev);
        uint16_t tmp = 0;
  
 -      pci_read_config_word(dev->pdev, GCFGC, &tmp);
 +      pci_read_config_word(pdev, GCFGC, &tmp);
  
        cdclk_sel = (tmp >> 12) & 0x1;
  
  
  static int i965gm_get_display_clock_speed(struct drm_device *dev)
  {
 +      struct pci_dev *pdev = dev->pdev;
        static const uint8_t div_3200[] = { 16, 10,  8 };
        static const uint8_t div_4000[] = { 20, 12, 10 };
        static const uint8_t div_5333[] = { 24, 16, 14 };
        unsigned int cdclk_sel, vco = intel_hpll_vco(dev);
        uint16_t tmp = 0;
  
 -      pci_read_config_word(dev->pdev, GCFGC, &tmp);
 +      pci_read_config_word(pdev, GCFGC, &tmp);
  
        cdclk_sel = ((tmp >> 8) & 0x1f) - 1;
  
@@@ -7618,7 -7107,6 +7620,7 @@@ fail
  
  static int g33_get_display_clock_speed(struct drm_device *dev)
  {
 +      struct pci_dev *pdev = dev->pdev;
        static const uint8_t div_3200[] = { 12, 10,  8,  7, 5, 16 };
        static const uint8_t div_4000[] = { 14, 12, 10,  8, 6, 20 };
        static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 };
        unsigned int cdclk_sel, vco = intel_hpll_vco(dev);
        uint16_t tmp = 0;
  
 -      pci_read_config_word(dev->pdev, GCFGC, &tmp);
 +      pci_read_config_word(pdev, GCFGC, &tmp);
  
        cdclk_sel = (tmp >> 4) & 0x7;
  
@@@ -9793,7 -9281,7 +9795,7 @@@ skylake_get_initial_plane_config(struc
        return;
  
  error:
 -      kfree(fb);
 +      kfree(intel_fb);
  }
  
  static void ironlake_get_pfit_config(struct intel_crtc *crtc,
@@@ -9999,7 -9487,7 +10001,7 @@@ static void assert_can_disable_lcpll(st
        I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n");
        I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n");
        I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n");
 -      I915_STATE_WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n");
 +      I915_STATE_WARN(I915_READ(PP_STATUS(0)) & PP_ON, "Panel power on\n");
        I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
             "CPU PWM1 enabled\n");
        if (IS_HASWELL(dev))
@@@ -10038,7 -9526,7 +10040,7 @@@ static void hsw_write_dcomp(struct drm_
                mutex_lock(&dev_priv->rps.hw_lock);
                if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP,
                                            val))
 -                      DRM_ERROR("Failed to write to D_COMP\n");
 +                      DRM_DEBUG_KMS("Failed to write to D_COMP\n");
                mutex_unlock(&dev_priv->rps.hw_lock);
        } else {
                I915_WRITE(D_COMP_BDW, val);
@@@ -10446,12 -9934,15 +10448,12 @@@ static void bxt_get_ddi_pll(struct drm_
  
        switch (port) {
        case PORT_A:
 -              pipe_config->ddi_pll_sel = SKL_DPLL0;
                id = DPLL_ID_SKL_DPLL0;
                break;
        case PORT_B:
 -              pipe_config->ddi_pll_sel = SKL_DPLL1;
                id = DPLL_ID_SKL_DPLL1;
                break;
        case PORT_C:
 -              pipe_config->ddi_pll_sel = SKL_DPLL2;
                id = DPLL_ID_SKL_DPLL2;
                break;
        default:
@@@ -10470,10 -9961,25 +10472,10 @@@ static void skylake_get_ddi_pll(struct 
        u32 temp;
  
        temp = I915_READ(DPLL_CTRL2) & DPLL_CTRL2_DDI_CLK_SEL_MASK(port);
 -      pipe_config->ddi_pll_sel = temp >> (port * 3 + 1);
 +      id = temp >> (port * 3 + 1);
  
 -      switch (pipe_config->ddi_pll_sel) {
 -      case SKL_DPLL0:
 -              id = DPLL_ID_SKL_DPLL0;
 -              break;
 -      case SKL_DPLL1:
 -              id = DPLL_ID_SKL_DPLL1;
 -              break;
 -      case SKL_DPLL2:
 -              id = DPLL_ID_SKL_DPLL2;
 -              break;
 -      case SKL_DPLL3:
 -              id = DPLL_ID_SKL_DPLL3;
 -              break;
 -      default:
 -              MISSING_CASE(pipe_config->ddi_pll_sel);
 +      if (WARN_ON(id < SKL_DPLL0 || id > SKL_DPLL3))
                return;
 -      }
  
        pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id);
  }
@@@ -10483,9 -9989,10 +10485,9 @@@ static void haswell_get_ddi_pll(struct 
                                struct intel_crtc_state *pipe_config)
  {
        enum intel_dpll_id id;
 +      uint32_t ddi_pll_sel = I915_READ(PORT_CLK_SEL(port));
  
 -      pipe_config->ddi_pll_sel = I915_READ(PORT_CLK_SEL(port));
 -
 -      switch (pipe_config->ddi_pll_sel) {
 +      switch (ddi_pll_sel) {
        case PORT_CLK_SEL_WRPLL1:
                id = DPLL_ID_WRPLL1;
                break;
                id = DPLL_ID_LCPLL_2700;
                break;
        default:
 -              MISSING_CASE(pipe_config->ddi_pll_sel);
 +              MISSING_CASE(ddi_pll_sel);
                /* fall through */
        case PORT_CLK_SEL_NONE:
                return;
@@@ -10738,7 -10245,7 +10740,7 @@@ static void i845_update_cursor(struct d
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        uint32_t cntl = 0, size = 0;
  
 -      if (plane_state && plane_state->visible) {
 +      if (plane_state && plane_state->base.visible) {
                unsigned int width = plane_state->base.crtc_w;
                unsigned int height = plane_state->base.crtc_h;
                unsigned int stride = roundup_pow_of_two(width) * 4;
@@@ -10799,14 -10306,10 +10801,14 @@@ static void i9xx_update_cursor(struct d
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 +      const struct skl_wm_values *wm = &dev_priv->wm.skl_results;
        int pipe = intel_crtc->pipe;
        uint32_t cntl = 0;
  
 -      if (plane_state && plane_state->visible) {
 +      if (INTEL_GEN(dev_priv) >= 9 && wm->dirty_pipes & drm_crtc_mask(crtc))
 +              skl_write_cursor_wm(intel_crtc, wm);
 +
 +      if (plane_state && plane_state->base.visible) {
                cntl = MCURSOR_GAMMA_ENABLE;
                switch (plane_state->base.crtc_w) {
                        case 64:
                if (HAS_DDI(dev))
                        cntl |= CURSOR_PIPE_CSC_ENABLE;
  
 -              if (plane_state->base.rotation == BIT(DRM_ROTATE_180))
 +              if (plane_state->base.rotation == DRM_ROTATE_180)
                        cntl |= CURSOR_ROTATE_180;
        }
  
@@@ -10873,7 -10376,7 +10875,7 @@@ static void intel_crtc_update_cursor(st
  
                /* ILK+ do this automagically */
                if (HAS_GMCH_DISPLAY(dev) &&
 -                  plane_state->base.rotation == BIT(DRM_ROTATE_180)) {
 +                  plane_state->base.rotation == DRM_ROTATE_180) {
                        base += (plane_state->base.crtc_h *
                                 plane_state->base.crtc_w - 1) * 4;
                }
@@@ -11006,7 -10509,7 +11008,7 @@@ intel_framebuffer_create_for_mode(struc
  
        fb = intel_framebuffer_create(dev, &mode_cmd, obj);
        if (IS_ERR(fb))
 -              drm_gem_object_unreference_unlocked(&obj->base);
 +              i915_gem_object_put_unlocked(obj);
  
        return fb;
  }
@@@ -11517,13 -11020,13 +11519,13 @@@ static void intel_unpin_work_fn(struct 
  
        mutex_lock(&dev->struct_mutex);
        intel_unpin_fb_obj(work->old_fb, primary->state->rotation);
 -      drm_gem_object_unreference(&work->pending_flip_obj->base);
 -
 -      if (work->flip_queued_req)
 -              i915_gem_request_assign(&work->flip_queued_req, NULL);
 +      i915_gem_object_put(work->pending_flip_obj);
        mutex_unlock(&dev->struct_mutex);
  
 -      intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit);
 +      i915_gem_request_put(work->flip_queued_req);
 +
 +      intel_frontbuffer_flip_complete(to_i915(dev),
 +                                      to_intel_plane(primary)->frontbuffer_bit);
        intel_fbc_post_update(crtc);
        drm_framebuffer_unreference(work->old_fb);
  
@@@ -11544,8 -11047,10 +11546,8 @@@ static bool __pageflip_finished_cs(stru
  {
        struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
 -      unsigned reset_counter;
  
 -      reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 -      if (crtc->reset_counter != reset_counter)
 +      if (abort_flip_on_reset(crtc))
                return true;
  
        /*
@@@ -11686,7 -11191,7 +11688,7 @@@ static int intel_gen2_queue_flip(struc
                                 struct drm_i915_gem_request *req,
                                 uint32_t flags)
  {
 -      struct intel_engine_cs *engine = req->engine;
 +      struct intel_ring *ring = req->ring;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        u32 flip_mask;
        int ret;
                flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
        else
                flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
 -      intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
 -      intel_ring_emit(engine, MI_NOOP);
 -      intel_ring_emit(engine, MI_DISPLAY_FLIP |
 +      intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
 +      intel_ring_emit(ring, MI_NOOP);
 +      intel_ring_emit(ring, MI_DISPLAY_FLIP |
                        MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 -      intel_ring_emit(engine, fb->pitches[0]);
 -      intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
 -      intel_ring_emit(engine, 0); /* aux display base address, unused */
 +      intel_ring_emit(ring, fb->pitches[0]);
 +      intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
 +      intel_ring_emit(ring, 0); /* aux display base address, unused */
  
        return 0;
  }
@@@ -11720,7 -11225,7 +11722,7 @@@ static int intel_gen3_queue_flip(struc
                                 struct drm_i915_gem_request *req,
                                 uint32_t flags)
  {
 -      struct intel_engine_cs *engine = req->engine;
 +      struct intel_ring *ring = req->ring;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        u32 flip_mask;
        int ret;
                flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
        else
                flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
 -      intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
 -      intel_ring_emit(engine, MI_NOOP);
 -      intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 |
 +      intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
 +      intel_ring_emit(ring, MI_NOOP);
 +      intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 |
                        MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 -      intel_ring_emit(engine, fb->pitches[0]);
 -      intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
 -      intel_ring_emit(engine, MI_NOOP);
 +      intel_ring_emit(ring, fb->pitches[0]);
 +      intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
 +      intel_ring_emit(ring, MI_NOOP);
  
        return 0;
  }
@@@ -11751,7 -11256,7 +11753,7 @@@ static int intel_gen4_queue_flip(struc
                                 struct drm_i915_gem_request *req,
                                 uint32_t flags)
  {
 -      struct intel_engine_cs *engine = req->engine;
 +      struct intel_ring *ring = req->ring;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        uint32_t pf, pipesrc;
         * Display Registers (which do not change across a page-flip)
         * so we need only reprogram the base address.
         */
 -      intel_ring_emit(engine, MI_DISPLAY_FLIP |
 +      intel_ring_emit(ring, MI_DISPLAY_FLIP |
                        MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 -      intel_ring_emit(engine, fb->pitches[0]);
 -      intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset |
 -                      obj->tiling_mode);
 +      intel_ring_emit(ring, fb->pitches[0]);
 +      intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset |
 +                      intel_fb_modifier_to_tiling(fb->modifier[0]));
  
        /* XXX Enabling the panel-fitter across page-flip is so far
         * untested on non-native modes, so ignore it for now.
         */
        pf = 0;
        pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
 -      intel_ring_emit(engine, pf | pipesrc);
 +      intel_ring_emit(ring, pf | pipesrc);
  
        return 0;
  }
@@@ -11789,7 -11294,7 +11791,7 @@@ static int intel_gen6_queue_flip(struc
                                 struct drm_i915_gem_request *req,
                                 uint32_t flags)
  {
 -      struct intel_engine_cs *engine = req->engine;
 +      struct intel_ring *ring = req->ring;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        uint32_t pf, pipesrc;
        if (ret)
                return ret;
  
 -      intel_ring_emit(engine, MI_DISPLAY_FLIP |
 +      intel_ring_emit(ring, MI_DISPLAY_FLIP |
                        MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 -      intel_ring_emit(engine, fb->pitches[0] | obj->tiling_mode);
 -      intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
 +      intel_ring_emit(ring, fb->pitches[0] |
 +                      intel_fb_modifier_to_tiling(fb->modifier[0]));
 +      intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
  
        /* Contrary to the suggestions in the documentation,
         * "Enable Panel Fitter" does not seem to be required when page
         */
        pf = 0;
        pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
 -      intel_ring_emit(engine, pf | pipesrc);
 +      intel_ring_emit(ring, pf | pipesrc);
  
        return 0;
  }
@@@ -11825,7 -11329,7 +11827,7 @@@ static int intel_gen7_queue_flip(struc
                                 struct drm_i915_gem_request *req,
                                 uint32_t flags)
  {
 -      struct intel_engine_cs *engine = req->engine;
 +      struct intel_ring *ring = req->ring;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        uint32_t plane_bit = 0;
        int len, ret;
        }
  
        len = 4;
 -      if (engine->id == RCS) {
 +      if (req->engine->id == RCS) {
                len += 6;
                /*
                 * On Gen 8, SRM is now taking an extra dword to accommodate
         * for the RCS also doesn't appear to drop events. Setting the DERRMR
         * to zero does lead to lockups within MI_DISPLAY_FLIP.
         */
 -      if (engine->id == RCS) {
 -              intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
 -              intel_ring_emit_reg(engine, DERRMR);
 -              intel_ring_emit(engine, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
 +      if (req->engine->id == RCS) {
 +              intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 +              intel_ring_emit_reg(ring, DERRMR);
 +              intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
                                          DERRMR_PIPEB_PRI_FLIP_DONE |
                                          DERRMR_PIPEC_PRI_FLIP_DONE));
                if (IS_GEN8(dev))
 -                      intel_ring_emit(engine, MI_STORE_REGISTER_MEM_GEN8 |
 +                      intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 |
                                              MI_SRM_LRM_GLOBAL_GTT);
                else
 -                      intel_ring_emit(engine, MI_STORE_REGISTER_MEM |
 +                      intel_ring_emit(ring, MI_STORE_REGISTER_MEM |
                                              MI_SRM_LRM_GLOBAL_GTT);
 -              intel_ring_emit_reg(engine, DERRMR);
 -              intel_ring_emit(engine, engine->scratch.gtt_offset + 256);
 +              intel_ring_emit_reg(ring, DERRMR);
 +              intel_ring_emit(ring,
 +                              i915_ggtt_offset(req->engine->scratch) + 256);
                if (IS_GEN8(dev)) {
 -                      intel_ring_emit(engine, 0);
 -                      intel_ring_emit(engine, MI_NOOP);
 +                      intel_ring_emit(ring, 0);
 +                      intel_ring_emit(ring, MI_NOOP);
                }
        }
  
 -      intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | plane_bit);
 -      intel_ring_emit(engine, (fb->pitches[0] | obj->tiling_mode));
 -      intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
 -      intel_ring_emit(engine, (MI_NOOP));
 +      intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
 +      intel_ring_emit(ring, fb->pitches[0] |
 +                      intel_fb_modifier_to_tiling(fb->modifier[0]));
 +      intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
 +      intel_ring_emit(ring, (MI_NOOP));
  
        return 0;
  }
@@@ -11944,8 -11446,7 +11946,8 @@@ static bool use_mmio_flip(struct intel_
        if (resv && !reservation_object_test_signaled_rcu(resv, false))
                return true;
  
 -      return engine != i915_gem_request_get_engine(obj->last_write_req);
 +      return engine != i915_gem_active_get_engine(&obj->last_write,
 +                                                  &obj->base.dev->struct_mutex);
  }
  
  static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
        const enum pipe pipe = intel_crtc->pipe;
 -      u32 ctl, stride, tile_height;
 +      u32 ctl, stride = skl_plane_stride(fb, 0, rotation);
  
        ctl = I915_READ(PLANE_CTL(pipe, 0));
        ctl &= ~PLANE_CTL_TILED_MASK;
                MISSING_CASE(fb->modifier[0]);
        }
  
 -      /*
 -       * The stride is either expressed as a multiple of 64 bytes chunks for
 -       * linear buffers or in number of tiles for tiled buffers.
 -       */
 -      if (intel_rotation_90_or_270(rotation)) {
 -              /* stride = Surface height in tiles */
 -              tile_height = intel_tile_height(dev_priv, fb->modifier[0], 0);
 -              stride = DIV_ROUND_UP(fb->height, tile_height);
 -      } else {
 -              stride = fb->pitches[0] /
 -                      intel_fb_stride_alignment(dev_priv, fb->modifier[0],
 -                                                fb->pixel_format);
 -      }
 -
        /*
         * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on
         * PLANE_SURF updates, the update is then guaranteed to be atomic.
@@@ -11992,13 -11507,15 +11994,13 @@@ static void ilk_do_mmio_flip(struct int
  {
        struct drm_device *dev = intel_crtc->base.dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
 -      struct intel_framebuffer *intel_fb =
 -              to_intel_framebuffer(intel_crtc->base.primary->fb);
 -      struct drm_i915_gem_object *obj = intel_fb->obj;
 +      struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
        i915_reg_t reg = DSPCNTR(intel_crtc->plane);
        u32 dspcntr;
  
        dspcntr = I915_READ(reg);
  
 -      if (obj->tiling_mode != I915_TILING_NONE)
 +      if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
                dspcntr |= DISPPLANE_TILED;
        else
                dspcntr &= ~DISPPLANE_TILED;
@@@ -12021,8 -11538,9 +12023,8 @@@ static void intel_mmio_flip_work_func(s
        struct reservation_object *resv;
  
        if (work->flip_queued_req)
 -              WARN_ON(__i915_wait_request(work->flip_queued_req,
 -                                          false, NULL,
 -                                          &dev_priv->rps.mmioflips));
 +              WARN_ON(i915_wait_request(work->flip_queued_req,
 +                                        0, NULL, NO_WAITBOOST));
  
        /* For framebuffer backed by dmabuf, wait for fence */
        resv = i915_gem_object_get_dmabuf_resv(obj);
@@@ -12133,8 -11651,7 +12135,8 @@@ static int intel_crtc_page_flip(struct 
        struct intel_flip_work *work;
        struct intel_engine_cs *engine;
        bool mmio_flip;
 -      struct drm_i915_gem_request *request = NULL;
 +      struct drm_i915_gem_request *request;
 +      struct i915_vma *vma;
        int ret;
  
        /*
  
        /* Reference the objects for the scheduled work. */
        drm_framebuffer_reference(work->old_fb);
 -      drm_gem_object_reference(&obj->base);
  
        crtc->primary->fb = fb;
        update_state_fb(crtc->primary);
  
 -      intel_fbc_pre_update(intel_crtc, intel_crtc->config,
 -                           to_intel_plane_state(primary->state));
 -
 -      work->pending_flip_obj = obj;
 +      work->pending_flip_obj = i915_gem_object_get(obj);
  
        ret = i915_mutex_lock_interruptible(dev);
        if (ret)
                goto cleanup;
  
 -      intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 -      if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
 +      intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error);
 +      if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
                ret = -EIO;
                goto cleanup;
        }
  
        if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
                engine = &dev_priv->engine[BCS];
 -              if (obj->tiling_mode != intel_fb_obj(work->old_fb)->tiling_mode)
 +              if (fb->modifier[0] != old_fb->modifier[0])
                        /* vlv: DISPLAY_FLIP fails to change tiling */
                        engine = NULL;
        } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
                engine = &dev_priv->engine[BCS];
        } else if (INTEL_INFO(dev)->gen >= 7) {
 -              engine = i915_gem_request_get_engine(obj->last_write_req);
 +              engine = i915_gem_active_get_engine(&obj->last_write,
 +                                                  &obj->base.dev->struct_mutex);
                if (engine == NULL || engine->id != RCS)
                        engine = &dev_priv->engine[BCS];
        } else {
  
        mmio_flip = use_mmio_flip(engine, obj);
  
 -      /* When using CS flips, we want to emit semaphores between rings.
 -       * However, when using mmio flips we will create a task to do the
 -       * synchronisation, so all we want here is to pin the framebuffer
 -       * into the display plane and skip any waits.
 -       */
 -      if (!mmio_flip) {
 -              ret = i915_gem_object_sync(obj, engine, &request);
 -              if (!ret && !request) {
 -                      request = i915_gem_request_alloc(engine, NULL);
 -                      ret = PTR_ERR_OR_ZERO(request);
 -              }
 -
 -              if (ret)
 -                      goto cleanup_pending;
 -      }
 -
 -      ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
 -      if (ret)
 +      vma = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
 +      if (IS_ERR(vma)) {
 +              ret = PTR_ERR(vma);
                goto cleanup_pending;
 +      }
  
 -      work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary),
 -                                                obj, 0);
 +      work->gtt_offset = intel_fb_gtt_offset(fb, primary->state->rotation);
        work->gtt_offset += intel_crtc->dspaddr_offset;
        work->rotation = crtc->primary->state->rotation;
  
 +      /*
 +       * There's the potential that the next frame will not be compatible with
 +       * FBC, so we want to call pre_update() before the actual page flip.
 +       * The problem is that pre_update() caches some information about the fb
 +       * object, so we want to do this only after the object is pinned. Let's
 +       * be on the safe side and do this immediately before scheduling the
 +       * flip.
 +       */
 +      intel_fbc_pre_update(intel_crtc, intel_crtc->config,
 +                           to_intel_plane_state(primary->state));
 +
        if (mmio_flip) {
                INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
  
 -              i915_gem_request_assign(&work->flip_queued_req,
 -                                      obj->last_write_req);
 -
 +              work->flip_queued_req = i915_gem_active_get(&obj->last_write,
 +                                                          &obj->base.dev->struct_mutex);
                schedule_work(&work->mmio_work);
        } else {
 -              i915_gem_request_assign(&work->flip_queued_req, request);
 +              request = i915_gem_request_alloc(engine, engine->last_context);
 +              if (IS_ERR(request)) {
 +                      ret = PTR_ERR(request);
 +                      goto cleanup_unpin;
 +              }
 +
 +              ret = i915_gem_request_await_object(request, obj, false);
 +              if (ret)
 +                      goto cleanup_request;
 +
                ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request,
                                                   page_flip_flags);
                if (ret)
 -                      goto cleanup_unpin;
 +                      goto cleanup_request;
  
                intel_mark_page_flip_active(intel_crtc, work);
  
 +              work->flip_queued_req = i915_gem_request_get(request);
                i915_add_request_no_flush(request);
        }
  
                          to_intel_plane(primary)->frontbuffer_bit);
        mutex_unlock(&dev->struct_mutex);
  
 -      intel_frontbuffer_flip_prepare(dev,
 +      intel_frontbuffer_flip_prepare(to_i915(dev),
                                       to_intel_plane(primary)->frontbuffer_bit);
  
        trace_i915_flip_request(intel_crtc->plane, obj);
  
        return 0;
  
 +cleanup_request:
 +      i915_add_request_no_flush(request);
  cleanup_unpin:
        intel_unpin_fb_obj(fb, crtc->primary->state->rotation);
  cleanup_pending:
 -      if (!IS_ERR_OR_NULL(request))
 -              i915_add_request_no_flush(request);
        atomic_dec(&intel_crtc->unpin_work_count);
        mutex_unlock(&dev->struct_mutex);
  cleanup:
        crtc->primary->fb = old_fb;
        update_state_fb(crtc->primary);
  
 -      drm_gem_object_unreference_unlocked(&obj->base);
 +      i915_gem_object_put_unlocked(obj);
        drm_framebuffer_unreference(work->old_fb);
  
        spin_lock_irq(&dev->event_lock);
@@@ -12378,7 -11893,7 +12380,7 @@@ static bool intel_wm_need_update(struc
        struct intel_plane_state *cur = to_intel_plane_state(plane->state);
  
        /* Update watermarks on tiling or size changes. */
 -      if (new->visible != cur->visible)
 +      if (new->base.visible != cur->base.visible)
                return true;
  
        if (!cur->base.fb || !new->base.fb)
  
        if (cur->base.fb->modifier[0] != new->base.fb->modifier[0] ||
            cur->base.rotation != new->base.rotation ||
 -          drm_rect_width(&new->src) != drm_rect_width(&cur->src) ||
 -          drm_rect_height(&new->src) != drm_rect_height(&cur->src) ||
 -          drm_rect_width(&new->dst) != drm_rect_width(&cur->dst) ||
 -          drm_rect_height(&new->dst) != drm_rect_height(&cur->dst))
 +          drm_rect_width(&new->base.src) != drm_rect_width(&cur->base.src) ||
 +          drm_rect_height(&new->base.src) != drm_rect_height(&cur->base.src) ||
 +          drm_rect_width(&new->base.dst) != drm_rect_width(&cur->base.dst) ||
 +          drm_rect_height(&new->base.dst) != drm_rect_height(&cur->base.dst))
                return true;
  
        return false;
  
  static bool needs_scaling(struct intel_plane_state *state)
  {
 -      int src_w = drm_rect_width(&state->src) >> 16;
 -      int src_h = drm_rect_height(&state->src) >> 16;
 -      int dst_w = drm_rect_width(&state->dst);
 -      int dst_h = drm_rect_height(&state->dst);
 +      int src_w = drm_rect_width(&state->base.src) >> 16;
 +      int src_h = drm_rect_height(&state->base.src) >> 16;
 +      int dst_w = drm_rect_width(&state->base.dst);
 +      int dst_h = drm_rect_height(&state->base.dst);
  
        return (src_w != dst_w || src_h != dst_h);
  }
@@@ -12431,8 -11946,8 +12433,8 @@@ int intel_plane_atomic_calc_changes(str
                        return ret;
        }
  
 -      was_visible = old_plane_state->visible;
 -      visible = to_intel_plane_state(plane_state)->visible;
 +      was_visible = old_plane_state->base.visible;
 +      visible = to_intel_plane_state(plane_state)->base.visible;
  
        if (!was_crtc_enabled && WARN_ON(was_visible))
                was_visible = false;
         * only combine the results from all planes in the current place?
         */
        if (!is_crtc_enabled)
 -              to_intel_plane_state(plane_state)->visible = visible = false;
 +              to_intel_plane_state(plane_state)->base.visible = visible = false;
  
        if (!was_visible && !visible)
                return 0;
@@@ -12786,9 -12301,10 +12788,9 @@@ static void intel_dump_pipe_config(stru
        DRM_DEBUG_KMS("double wide: %i\n", pipe_config->double_wide);
  
        if (IS_BROXTON(dev)) {
 -              DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: ebb0: 0x%x, ebb4: 0x%x,"
 +              DRM_DEBUG_KMS("dpll_hw_state: ebb0: 0x%x, ebb4: 0x%x,"
                              "pll0: 0x%x, pll1: 0x%x, pll2: 0x%x, pll3: 0x%x, "
                              "pll6: 0x%x, pll8: 0x%x, pll9: 0x%x, pll10: 0x%x, pcsdw12: 0x%x\n",
 -                            pipe_config->ddi_pll_sel,
                              pipe_config->dpll_hw_state.ebb0,
                              pipe_config->dpll_hw_state.ebb4,
                              pipe_config->dpll_hw_state.pll0,
                              pipe_config->dpll_hw_state.pll10,
                              pipe_config->dpll_hw_state.pcsdw12);
        } else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 -              DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: "
 +              DRM_DEBUG_KMS("dpll_hw_state: "
                              "ctrl1: 0x%x, cfgcr1: 0x%x, cfgcr2: 0x%x\n",
 -                            pipe_config->ddi_pll_sel,
                              pipe_config->dpll_hw_state.ctrl1,
                              pipe_config->dpll_hw_state.cfgcr1,
                              pipe_config->dpll_hw_state.cfgcr2);
        } else if (HAS_DDI(dev)) {
 -              DRM_DEBUG_KMS("ddi_pll_sel: 0x%x; dpll_hw_state: wrpll: 0x%x spll: 0x%x\n",
 -                            pipe_config->ddi_pll_sel,
 +              DRM_DEBUG_KMS("dpll_hw_state: wrpll: 0x%x spll: 0x%x\n",
                              pipe_config->dpll_hw_state.wrpll,
                              pipe_config->dpll_hw_state.spll);
        } else {
  
        DRM_DEBUG_KMS("planes on this crtc\n");
        list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
 +              char *format_name;
                intel_plane = to_intel_plane(plane);
                if (intel_plane->pipe != crtc->pipe)
                        continue;
                        continue;
                }
  
 +              format_name = drm_get_format_name(fb->pixel_format);
 +
                DRM_DEBUG_KMS("[PLANE:%d:%s] enabled",
                              plane->base.id, plane->name);
                DRM_DEBUG_KMS("\tFB:%d, fb = %ux%u format = %s",
 -                            fb->base.id, fb->width, fb->height,
 -                            drm_get_format_name(fb->pixel_format));
 +                            fb->base.id, fb->width, fb->height, format_name);
                DRM_DEBUG_KMS("\tscaler:%d src %dx%d+%d+%d dst %dx%d+%d+%d\n",
                              state->scaler_id,
 -                            state->src.x1 >> 16, state->src.y1 >> 16,
 -                            drm_rect_width(&state->src) >> 16,
 -                            drm_rect_height(&state->src) >> 16,
 -                            state->dst.x1, state->dst.y1,
 -                            drm_rect_width(&state->dst),
 -                            drm_rect_height(&state->dst));
 +                            state->base.src.x1 >> 16,
 +                            state->base.src.y1 >> 16,
 +                            drm_rect_width(&state->base.src) >> 16,
 +                            drm_rect_height(&state->base.src) >> 16,
 +                            state->base.dst.x1, state->base.dst.y1,
 +                            drm_rect_width(&state->base.dst),
 +                            drm_rect_height(&state->base.dst));
 +
 +              kfree(format_name);
        }
  }
  
@@@ -12859,7 -12372,6 +12861,7 @@@ static bool check_digital_port_conflict
        struct drm_device *dev = state->dev;
        struct drm_connector *connector;
        unsigned int used_ports = 0;
 +      unsigned int used_mst_ports = 0;
  
        /*
         * Walk the connector list instead of the encoder
                                return false;
  
                        used_ports |= port_mask;
 +                      break;
 +              case INTEL_OUTPUT_DP_MST:
 +                      used_mst_ports |=
 +                              1 << enc_to_mst(&encoder->base)->primary->port;
 +                      break;
                default:
                        break;
                }
        }
  
 +      /* can't mix MST and SST/HDMI on the same port */
 +      if (used_ports & used_mst_ports)
 +              return false;
 +
        return true;
  }
  
@@@ -12920,6 -12423,7 +12922,6 @@@ clear_intel_crtc_state(struct intel_crt
        struct intel_crtc_scaler_state scaler_state;
        struct intel_dpll_hw_state dpll_hw_state;
        struct intel_shared_dpll *shared_dpll;
 -      uint32_t ddi_pll_sel;
        bool force_thru;
  
        /* FIXME: before the switch to atomic started, a new pipe_config was
        scaler_state = crtc_state->scaler_state;
        shared_dpll = crtc_state->shared_dpll;
        dpll_hw_state = crtc_state->dpll_hw_state;
 -      ddi_pll_sel = crtc_state->ddi_pll_sel;
        force_thru = crtc_state->pch_pfit.force_thru;
  
        memset(crtc_state, 0, sizeof *crtc_state);
        crtc_state->scaler_state = scaler_state;
        crtc_state->shared_dpll = shared_dpll;
        crtc_state->dpll_hw_state = dpll_hw_state;
 -      crtc_state->ddi_pll_sel = ddi_pll_sel;
        crtc_state->pch_pfit.force_thru = force_thru;
  }
  
@@@ -13026,7 -12532,7 +13028,7 @@@ encoder_retry
  
                encoder = to_intel_encoder(connector_state->best_encoder);
  
 -              if (!(encoder->compute_config(encoder, pipe_config))) {
 +              if (!(encoder->compute_config(encoder, pipe_config, connector_state))) {
                        DRM_DEBUG_KMS("Encoder config failure\n");
                        goto fail;
                }
@@@ -13114,6 -12620,12 +13116,6 @@@ static bool intel_fuzzy_clock_check(in
        return false;
  }
  
 -#define for_each_intel_crtc_masked(dev, mask, intel_crtc) \
 -      list_for_each_entry((intel_crtc), \
 -                          &(dev)->mode_config.crtc_list, \
 -                          base.head) \
 -              for_each_if (mask & (1 <<(intel_crtc)->pipe))
 -
  static bool
  intel_compare_m_n(unsigned int m, unsigned int n,
                  unsigned int m2, unsigned int n2,
@@@ -13361,6 -12873,8 +13363,6 @@@ intel_pipe_config_compare(struct drm_de
  
        PIPE_CONF_CHECK_I(double_wide);
  
 -      PIPE_CONF_CHECK_X(ddi_pll_sel);
 -
        PIPE_CONF_CHECK_P(shared_dpll);
        PIPE_CONF_CHECK_X(dpll_hw_state.dpll);
        PIPE_CONF_CHECK_X(dpll_hw_state.dpll_md);
@@@ -13442,23 -12956,16 +13444,23 @@@ static void verify_wm_state(struct drm_
                          hw_entry->start, hw_entry->end);
        }
  
 -      /* cursor */
 -      hw_entry = &hw_ddb.plane[pipe][PLANE_CURSOR];
 -      sw_entry = &sw_ddb->plane[pipe][PLANE_CURSOR];
 -
 -      if (!skl_ddb_entry_equal(hw_entry, sw_entry)) {
 -              DRM_ERROR("mismatch in DDB state pipe %c cursor "
 -                        "(expected (%u,%u), found (%u,%u))\n",
 -                        pipe_name(pipe),
 -                        sw_entry->start, sw_entry->end,
 -                        hw_entry->start, hw_entry->end);
 +      /*
 +       * cursor
 +       * If the cursor plane isn't active, we may not have updated it's ddb
 +       * allocation. In that case since the ddb allocation will be updated
 +       * once the plane becomes visible, we can skip this check
 +       */
 +      if (intel_crtc->cursor_addr) {
 +              hw_entry = &hw_ddb.plane[pipe][PLANE_CURSOR];
 +              sw_entry = &sw_ddb->plane[pipe][PLANE_CURSOR];
 +
 +              if (!skl_ddb_entry_equal(hw_entry, sw_entry)) {
 +                      DRM_ERROR("mismatch in DDB state pipe %c cursor "
 +                                "(expected (%u,%u), found (%u,%u))\n",
 +                                pipe_name(pipe),
 +                                sw_entry->start, sw_entry->end,
 +                                hw_entry->start, hw_entry->end);
 +              }
        }
  }
  
@@@ -14073,9 -13580,8 +14075,9 @@@ static int intel_atomic_prepare_commit(
                        if (!intel_plane_state->wait_req)
                                continue;
  
 -                      ret = __i915_wait_request(intel_plane_state->wait_req,
 -                                                true, NULL, NULL);
 +                      ret = i915_wait_request(intel_plane_state->wait_req,
 +                                              I915_WAIT_INTERRUPTIBLE,
 +                                              NULL, NULL);
                        if (ret) {
                                /* Any hang should be swallowed by the wait */
                                WARN_ON(ret == -EIO);
@@@ -14165,111 -13671,6 +14167,111 @@@ static bool needs_vblank_wait(struct in
        return false;
  }
  
 +static void intel_update_crtc(struct drm_crtc *crtc,
 +                            struct drm_atomic_state *state,
 +                            struct drm_crtc_state *old_crtc_state,
 +                            unsigned int *crtc_vblank_mask)
 +{
 +      struct drm_device *dev = crtc->dev;
 +      struct drm_i915_private *dev_priv = to_i915(dev);
 +      struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 +      struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc->state);
 +      bool modeset = needs_modeset(crtc->state);
 +
 +      if (modeset) {
 +              update_scanline_offset(intel_crtc);
 +              dev_priv->display.crtc_enable(pipe_config, state);
 +      } else {
 +              intel_pre_plane_update(to_intel_crtc_state(old_crtc_state));
 +      }
 +
 +      if (drm_atomic_get_existing_plane_state(state, crtc->primary)) {
 +              intel_fbc_enable(
 +                  intel_crtc, pipe_config,
 +                  to_intel_plane_state(crtc->primary->state));
 +      }
 +
 +      drm_atomic_helper_commit_planes_on_crtc(old_crtc_state);
 +
 +      if (needs_vblank_wait(pipe_config))
 +              *crtc_vblank_mask |= drm_crtc_mask(crtc);
 +}
 +
 +static void intel_update_crtcs(struct drm_atomic_state *state,
 +                             unsigned int *crtc_vblank_mask)
 +{
 +      struct drm_crtc *crtc;
 +      struct drm_crtc_state *old_crtc_state;
 +      int i;
 +
 +      for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
 +              if (!crtc->state->active)
 +                      continue;
 +
 +              intel_update_crtc(crtc, state, old_crtc_state,
 +                                crtc_vblank_mask);
 +      }
 +}
 +
 +static void skl_update_crtcs(struct drm_atomic_state *state,
 +                           unsigned int *crtc_vblank_mask)
 +{
 +      struct drm_device *dev = state->dev;
 +      struct drm_i915_private *dev_priv = to_i915(dev);
 +      struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 +      struct drm_crtc *crtc;
 +      struct drm_crtc_state *old_crtc_state;
 +      struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
 +      struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
 +      unsigned int updated = 0;
 +      bool progress;
 +      enum pipe pipe;
 +
 +      /*
 +       * Whenever the number of active pipes changes, we need to make sure we
 +       * update the pipes in the right order so that their ddb allocations
 +       * never overlap with eachother inbetween CRTC updates. Otherwise we'll
 +       * cause pipe underruns and other bad stuff.
 +       */
 +      do {
 +              int i;
 +              progress = false;
 +
 +              for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
 +                      bool vbl_wait = false;
 +                      unsigned int cmask = drm_crtc_mask(crtc);
 +                      pipe = to_intel_crtc(crtc)->pipe;
 +
 +                      if (updated & cmask || !crtc->state->active)
 +                              continue;
 +                      if (skl_ddb_allocation_overlaps(state, cur_ddb, new_ddb,
 +                                                      pipe))
 +                              continue;
 +
 +                      updated |= cmask;
 +
 +                      /*
 +                       * If this is an already active pipe, it's DDB changed,
 +                       * and this isn't the last pipe that needs updating
 +                       * then we need to wait for a vblank to pass for the
 +                       * new ddb allocation to take effect.
 +                       */
 +                      if (!skl_ddb_allocation_equals(cur_ddb, new_ddb, pipe) &&
 +                          !crtc->state->active_changed &&
 +                          intel_state->wm_results.dirty_pipes != updated)
 +                              vbl_wait = true;
 +
 +                      intel_update_crtc(crtc, state, old_crtc_state,
 +                                        crtc_vblank_mask);
 +
 +                      if (vbl_wait)
 +                              intel_wait_for_vblank(dev, pipe);
 +
 +                      progress = true;
 +              }
 +      } while (progress);
 +}
 +
  static void intel_atomic_commit_tail(struct drm_atomic_state *state)
  {
        struct drm_device *dev = state->dev;
                if (!intel_plane_state->wait_req)
                        continue;
  
 -              ret = __i915_wait_request(intel_plane_state->wait_req,
 -                                        true, NULL, NULL);
 +              ret = i915_wait_request(intel_plane_state->wait_req,
 +                                      0, NULL, NULL);
                /* EIO should be eaten, and we can't get interrupted in the
                 * worker, and blocking commits have waited already. */
                WARN_ON(ret);
  
                if (old_crtc_state->active) {
                        intel_crtc_disable_planes(crtc, old_crtc_state->plane_mask);
 -                      dev_priv->display.crtc_disable(crtc);
 +                      dev_priv->display.crtc_disable(to_intel_crtc_state(old_crtc_state), state);
                        intel_crtc->active = false;
                        intel_fbc_disable(intel_crtc);
                        intel_disable_shared_dpll(intel_crtc);
                intel_modeset_verify_disabled(dev);
        }
  
 -      /* Now enable the clocks, plane, pipe, and connectors that we set up. */
 +      /* Complete the events for pipes that have now been disabled */
        for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
 -              struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
                bool modeset = needs_modeset(crtc->state);
 -              struct intel_crtc_state *pipe_config =
 -                      to_intel_crtc_state(crtc->state);
 -
 -              if (modeset && crtc->state->active) {
 -                      update_scanline_offset(to_intel_crtc(crtc));
 -                      dev_priv->display.crtc_enable(crtc);
 -              }
  
                /* Complete events for now disable pipes here. */
                if (modeset && !crtc->state->active && crtc->state->event) {
  
                        crtc->state->event = NULL;
                }
 -
 -              if (!modeset)
 -                      intel_pre_plane_update(to_intel_crtc_state(old_crtc_state));
 -
 -              if (crtc->state->active &&
 -                  drm_atomic_get_existing_plane_state(state, crtc->primary))
 -                      intel_fbc_enable(intel_crtc, pipe_config, to_intel_plane_state(crtc->primary->state));
 -
 -              if (crtc->state->active)
 -                      drm_atomic_helper_commit_planes_on_crtc(old_crtc_state);
 -
 -              if (pipe_config->base.active && needs_vblank_wait(pipe_config))
 -                      crtc_vblank_mask |= 1 << i;
        }
  
 +      /* Now enable the clocks, plane, pipe, and connectors that we set up. */
 +      dev_priv->display.update_crtcs(state, &crtc_vblank_mask);
 +
        /* FIXME: We should call drm_atomic_helper_commit_hw_done() here
         * already, but still need the state for the delayed optimization. To
         * fix this:
@@@ -14463,12 -13882,19 +14465,12 @@@ static void intel_atomic_track_fbs(stru
  {
        struct drm_plane_state *old_plane_state;
        struct drm_plane *plane;
 -      struct drm_i915_gem_object *obj, *old_obj;
 -      struct intel_plane *intel_plane;
        int i;
  
 -      mutex_lock(&state->dev->struct_mutex);
 -      for_each_plane_in_state(state, plane, old_plane_state, i) {
 -              obj = intel_fb_obj(plane->state->fb);
 -              old_obj = intel_fb_obj(old_plane_state->fb);
 -              intel_plane = to_intel_plane(plane);
 -
 -              i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
 -      }
 -      mutex_unlock(&state->dev->struct_mutex);
 +      for_each_plane_in_state(state, plane, old_plane_state, i)
 +              i915_gem_track_fb(intel_fb_obj(old_plane_state->fb),
 +                                intel_fb_obj(plane->state->fb),
 +                                to_intel_plane(plane)->frontbuffer_bit);
  }
  
  /**
                drm_atomic_state_free(state);
  }
  
 -#undef for_each_intel_crtc_masked
 -
  /*
   * FIXME: Remove this once i915 is fully DRIVER_ATOMIC by calling
   *        drm_atomic_helper_legacy_gamma_set() directly.
@@@ -14632,7 -14060,7 +14634,7 @@@ static const struct drm_crtc_funcs inte
   */
  int
  intel_prepare_plane_fb(struct drm_plane *plane,
 -                     const struct drm_plane_state *new_state)
 +                     struct drm_plane_state *new_state)
  {
        struct drm_device *dev = plane->dev;
        struct drm_framebuffer *fb = new_state->fb;
                if (ret)
                        DRM_DEBUG_KMS("failed to attach phys object\n");
        } else {
 -              ret = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
 +              struct i915_vma *vma;
 +
 +              vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
 +              if (IS_ERR(vma))
 +                      ret = PTR_ERR(vma);
        }
  
        if (ret == 0) {
 -              struct intel_plane_state *plane_state =
 -                      to_intel_plane_state(new_state);
 -
 -              i915_gem_request_assign(&plane_state->wait_req,
 -                                      obj->last_write_req);
 +              to_intel_plane_state(new_state)->wait_req =
 +                      i915_gem_active_get(&obj->last_write,
 +                                          &obj->base.dev->struct_mutex);
        }
  
        return ret;
   */
  void
  intel_cleanup_plane_fb(struct drm_plane *plane,
 -                     const struct drm_plane_state *old_state)
 +                     struct drm_plane_state *old_state)
  {
        struct drm_device *dev = plane->dev;
        struct intel_plane_state *old_intel_state;
 +      struct intel_plane_state *intel_state = to_intel_plane_state(plane->state);
        struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
        struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
  
            !INTEL_INFO(dev)->cursor_needs_physical))
                intel_unpin_fb_obj(old_state->fb, old_state->rotation);
  
 +      i915_gem_request_assign(&intel_state->wait_req, NULL);
        i915_gem_request_assign(&old_intel_state->wait_req, NULL);
  }
  
@@@ -14770,14 -14194,13 +14772,14 @@@ intel_check_primary_plane(struct drm_pl
                          struct intel_crtc_state *crtc_state,
                          struct intel_plane_state *state)
  {
 +      struct drm_i915_private *dev_priv = to_i915(plane->dev);
        struct drm_crtc *crtc = state->base.crtc;
 -      struct drm_framebuffer *fb = state->base.fb;
        int min_scale = DRM_PLANE_HELPER_NO_SCALING;
        int max_scale = DRM_PLANE_HELPER_NO_SCALING;
        bool can_position = false;
 +      int ret;
  
 -      if (INTEL_INFO(plane->dev)->gen >= 9) {
 +      if (INTEL_GEN(dev_priv) >= 9) {
                /* use scaler when colorkey is not required */
                if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
                        min_scale = 1;
                can_position = true;
        }
  
 -      return drm_plane_helper_check_update(plane, crtc, fb, &state->src,
 -                                           &state->dst, &state->clip,
 -                                           state->base.rotation,
 -                                           min_scale, max_scale,
 -                                           can_position, true,
 -                                           &state->visible);
 +      ret = drm_plane_helper_check_state(&state->base,
 +                                         &state->clip,
 +                                         min_scale, max_scale,
 +                                         can_position, true);
 +      if (ret)
 +              return ret;
 +
 +      if (!state->base.fb)
 +              return 0;
 +
 +      if (INTEL_GEN(dev_priv) >= 9) {
 +              ret = skl_check_plane_surface(state);
 +              if (ret)
 +                      return ret;
 +      }
 +
 +      return 0;
  }
  
  static void intel_begin_crtc_commit(struct drm_crtc *crtc,
                                    struct drm_crtc_state *old_crtc_state)
  {
        struct drm_device *dev = crtc->dev;
 +      struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        struct intel_crtc_state *old_intel_state =
                to_intel_crtc_state(old_crtc_state);
        bool modeset = needs_modeset(crtc->state);
 +      enum pipe pipe = intel_crtc->pipe;
  
        /* Perform vblank evasion around commit operation */
        intel_pipe_update_start(intel_crtc);
  
        if (to_intel_crtc_state(crtc->state)->update_pipe)
                intel_update_pipe_config(intel_crtc, old_intel_state);
 -      else if (INTEL_INFO(dev)->gen >= 9)
 +      else if (INTEL_GEN(dev_priv) >= 9) {
                skl_detach_scalers(intel_crtc);
 +
 +              I915_WRITE(PIPE_WM_LINETIME(pipe),
 +                         dev_priv->wm.skl_hw.wm_linetime[pipe]);
 +      }
  }
  
  static void intel_finish_crtc_commit(struct drm_crtc *crtc,
  void intel_create_rotation_property(struct drm_device *dev, struct intel_plane *plane)
  {
        if (!dev->mode_config.rotation_property) {
 -              unsigned long flags = BIT(DRM_ROTATE_0) |
 -                      BIT(DRM_ROTATE_180);
 +              unsigned long flags = DRM_ROTATE_0 |
 +                      DRM_ROTATE_180;
  
                if (INTEL_INFO(dev)->gen >= 9)
 -                      flags |= BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270);
 +                      flags |= DRM_ROTATE_90 | DRM_ROTATE_270;
  
                dev->mode_config.rotation_property =
                        drm_mode_create_rotation_property(dev, flags);
@@@ -14988,17 -14394,19 +14990,17 @@@ intel_check_cursor_plane(struct drm_pla
                         struct intel_crtc_state *crtc_state,
                         struct intel_plane_state *state)
  {
 -      struct drm_crtc *crtc = crtc_state->base.crtc;
        struct drm_framebuffer *fb = state->base.fb;
        struct drm_i915_gem_object *obj = intel_fb_obj(fb);
        enum pipe pipe = to_intel_plane(plane)->pipe;
        unsigned stride;
        int ret;
  
 -      ret = drm_plane_helper_check_update(plane, crtc, fb, &state->src,
 -                                          &state->dst, &state->clip,
 -                                          state->base.rotation,
 -                                          DRM_PLANE_HELPER_NO_SCALING,
 -                                          DRM_PLANE_HELPER_NO_SCALING,
 -                                          true, true, &state->visible);
 +      ret = drm_plane_helper_check_state(&state->base,
 +                                         &state->clip,
 +                                         DRM_PLANE_HELPER_NO_SCALING,
 +                                         DRM_PLANE_HELPER_NO_SCALING,
 +                                         true, true);
        if (ret)
                return ret;
  
         * Refuse the put the cursor into that compromised position.
         */
        if (IS_CHERRYVIEW(plane->dev) && pipe == PIPE_C &&
 -          state->visible && state->base.crtc_x < 0) {
 +          state->base.visible && state->base.crtc_x < 0) {
                DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
                return -EINVAL;
        }
@@@ -15067,7 -14475,7 +15069,7 @@@ intel_update_cursor_plane(struct drm_pl
        if (!obj)
                addr = 0;
        else if (!INTEL_INFO(dev)->cursor_needs_physical)
 -              addr = i915_gem_obj_ggtt_offset(obj);
 +              addr = i915_gem_object_ggtt_offset(obj, NULL);
        else
                addr = obj->phys_handle->busaddr;
  
@@@ -15113,8 -14521,8 +15115,8 @@@ static struct drm_plane *intel_cursor_p
                if (!dev->mode_config.rotation_property)
                        dev->mode_config.rotation_property =
                                drm_mode_create_rotation_property(dev,
 -                                                      BIT(DRM_ROTATE_0) |
 -                                                      BIT(DRM_ROTATE_180));
 +                                                      DRM_ROTATE_0 |
 +                                                      DRM_ROTATE_180);
                if (dev->mode_config.rotation_property)
                        drm_object_attach_property(&cursor->base.base,
                                dev->mode_config.rotation_property,
@@@ -15320,50 -14728,12 +15322,50 @@@ static bool intel_crt_present(struct dr
        return true;
  }
  
 +void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv)
 +{
 +      int pps_num;
 +      int pps_idx;
 +
 +      if (HAS_DDI(dev_priv))
 +              return;
 +      /*
 +       * This w/a is needed at least on CPT/PPT, but to be sure apply it
 +       * everywhere where registers can be write protected.
 +       */
 +      if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 +              pps_num = 2;
 +      else
 +              pps_num = 1;
 +
 +      for (pps_idx = 0; pps_idx < pps_num; pps_idx++) {
 +              u32 val = I915_READ(PP_CONTROL(pps_idx));
 +
 +              val = (val & ~PANEL_UNLOCK_MASK) | PANEL_UNLOCK_REGS;
 +              I915_WRITE(PP_CONTROL(pps_idx), val);
 +      }
 +}
 +
 +static void intel_pps_init(struct drm_i915_private *dev_priv)
 +{
 +      if (HAS_PCH_SPLIT(dev_priv) || IS_BROXTON(dev_priv))
 +              dev_priv->pps_mmio_base = PCH_PPS_BASE;
 +      else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 +              dev_priv->pps_mmio_base = VLV_PPS_BASE;
 +      else
 +              dev_priv->pps_mmio_base = PPS_BASE;
 +
 +      intel_pps_unlock_regs_wa(dev_priv);
 +}
 +
  static void intel_setup_outputs(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_encoder *encoder;
        bool dpd_is_edp = false;
  
 +      intel_pps_init(dev_priv);
 +
        /*
         * intel_edp_init_connector() depends on this completing first, to
         * prevent the registeration of both eDP and LVDS and the incorrect
@@@ -15551,7 -14921,7 +15553,7 @@@ static void intel_user_framebuffer_dest
        drm_framebuffer_cleanup(fb);
        mutex_lock(&dev->struct_mutex);
        WARN_ON(!intel_fb->obj->framebuffer_references--);
 -      drm_gem_object_unreference(&intel_fb->obj->base);
 +      i915_gem_object_put(intel_fb->obj);
        mutex_unlock(&dev->struct_mutex);
        kfree(intel_fb);
  }
@@@ -15631,27 -15001,24 +15633,27 @@@ static int intel_framebuffer_init(struc
                                  struct drm_i915_gem_object *obj)
  {
        struct drm_i915_private *dev_priv = to_i915(dev);
 -      unsigned int aligned_height;
 +      unsigned int tiling = i915_gem_object_get_tiling(obj);
        int ret;
        u32 pitch_limit, stride_alignment;
 +      char *format_name;
  
        WARN_ON(!mutex_is_locked(&dev->struct_mutex));
  
        if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
 -              /* Enforce that fb modifier and tiling mode match, but only for
 -               * X-tiled. This is needed for FBC. */
 -              if (!!(obj->tiling_mode == I915_TILING_X) !=
 -                  !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) {
 +              /*
 +               * If there's a fence, enforce that
 +               * the fb modifier and tiling mode match.
 +               */
 +              if (tiling != I915_TILING_NONE &&
 +                  tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
                        DRM_DEBUG("tiling_mode doesn't match fb modifier\n");
                        return -EINVAL;
                }
        } else {
 -              if (obj->tiling_mode == I915_TILING_X)
 +              if (tiling == I915_TILING_X) {
                        mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
 -              else if (obj->tiling_mode == I915_TILING_Y) {
 +              } else if (tiling == I915_TILING_Y) {
                        DRM_DEBUG("No Y tiling for legacy addfb\n");
                        return -EINVAL;
                }
                return -EINVAL;
        }
  
 +      /*
 +       * gen2/3 display engine uses the fence if present,
 +       * so the tiling mode must match the fb modifier exactly.
 +       */
 +      if (INTEL_INFO(dev_priv)->gen < 4 &&
 +          tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
 +              DRM_DEBUG("tiling_mode must match fb modifier exactly on gen2/3\n");
 +              return -EINVAL;
 +      }
 +
        stride_alignment = intel_fb_stride_alignment(dev_priv,
                                                     mode_cmd->modifier[0],
                                                     mode_cmd->pixel_format);
                return -EINVAL;
        }
  
 -      if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED &&
 -          mode_cmd->pitches[0] != obj->stride) {
 +      /*
 +       * If there's a fence, enforce that
 +       * the fb pitch and fence stride match.
 +       */
 +      if (tiling != I915_TILING_NONE &&
 +          mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) {
                DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n",
 -                        mode_cmd->pitches[0], obj->stride);
 +                        mode_cmd->pitches[0],
 +                        i915_gem_object_get_stride(obj));
                return -EINVAL;
        }
  
                break;
        case DRM_FORMAT_XRGB1555:
                if (INTEL_INFO(dev)->gen > 3) {
 -                      DRM_DEBUG("unsupported pixel format: %s\n",
 -                                drm_get_format_name(mode_cmd->pixel_format));
 +                      format_name = drm_get_format_name(mode_cmd->pixel_format);
 +                      DRM_DEBUG("unsupported pixel format: %s\n", format_name);
 +                      kfree(format_name);
                        return -EINVAL;
                }
                break;
        case DRM_FORMAT_ABGR8888:
                if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
                    INTEL_INFO(dev)->gen < 9) {
 -                      DRM_DEBUG("unsupported pixel format: %s\n",
 -                                drm_get_format_name(mode_cmd->pixel_format));
 +                      format_name = drm_get_format_name(mode_cmd->pixel_format);
 +                      DRM_DEBUG("unsupported pixel format: %s\n", format_name);
 +                      kfree(format_name);
                        return -EINVAL;
                }
                break;
        case DRM_FORMAT_XRGB2101010:
        case DRM_FORMAT_XBGR2101010:
                if (INTEL_INFO(dev)->gen < 4) {
 -                      DRM_DEBUG("unsupported pixel format: %s\n",
 -                                drm_get_format_name(mode_cmd->pixel_format));
 +                      format_name = drm_get_format_name(mode_cmd->pixel_format);
 +                      DRM_DEBUG("unsupported pixel format: %s\n", format_name);
 +                      kfree(format_name);
                        return -EINVAL;
                }
                break;
        case DRM_FORMAT_ABGR2101010:
                if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
 -                      DRM_DEBUG("unsupported pixel format: %s\n",
 -                                drm_get_format_name(mode_cmd->pixel_format));
 +                      format_name = drm_get_format_name(mode_cmd->pixel_format);
 +                      DRM_DEBUG("unsupported pixel format: %s\n", format_name);
 +                      kfree(format_name);
                        return -EINVAL;
                }
                break;
        case DRM_FORMAT_YVYU:
        case DRM_FORMAT_VYUY:
                if (INTEL_INFO(dev)->gen < 5) {
 -                      DRM_DEBUG("unsupported pixel format: %s\n",
 -                                drm_get_format_name(mode_cmd->pixel_format));
 +                      format_name = drm_get_format_name(mode_cmd->pixel_format);
 +                      DRM_DEBUG("unsupported pixel format: %s\n", format_name);
 +                      kfree(format_name);
                        return -EINVAL;
                }
                break;
        default:
 -              DRM_DEBUG("unsupported pixel format: %s\n",
 -                        drm_get_format_name(mode_cmd->pixel_format));
 +              format_name = drm_get_format_name(mode_cmd->pixel_format);
 +              DRM_DEBUG("unsupported pixel format: %s\n", format_name);
 +              kfree(format_name);
                return -EINVAL;
        }
  
        if (mode_cmd->offsets[0] != 0)
                return -EINVAL;
  
 -      aligned_height = intel_fb_align_height(dev, mode_cmd->height,
 -                                             mode_cmd->pixel_format,
 -                                             mode_cmd->modifier[0]);
 -      /* FIXME drm helper for size checks (especially planar formats)? */
 -      if (obj->base.size < aligned_height * mode_cmd->pitches[0])
 -              return -EINVAL;
 -
        drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd);
        intel_fb->obj = obj;
  
 -      intel_fill_fb_info(dev_priv, &intel_fb->base);
 +      ret = intel_fill_fb_info(dev_priv, &intel_fb->base);
 +      if (ret)
 +              return ret;
  
        ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
        if (ret) {
@@@ -15807,13 -15158,13 +15809,13 @@@ intel_user_framebuffer_create(struct dr
        struct drm_i915_gem_object *obj;
        struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd;
  
 -      obj = to_intel_bo(drm_gem_object_lookup(filp, mode_cmd.handles[0]));
 -      if (&obj->base == NULL)
 +      obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]);
 +      if (!obj)
                return ERR_PTR(-ENOENT);
  
        fb = intel_framebuffer_create(dev, &mode_cmd, obj);
        if (IS_ERR(fb))
 -              drm_gem_object_unreference_unlocked(&obj->base);
 +              i915_gem_object_put_unlocked(obj);
  
        return fb;
  }
@@@ -15996,11 -15347,6 +15998,11 @@@ void intel_init_display_hooks(struct dr
                        skl_modeset_calc_cdclk;
        }
  
 +      if (dev_priv->info.gen >= 9)
 +              dev_priv->display.update_crtcs = skl_update_crtcs;
 +      else
 +              dev_priv->display.update_crtcs = intel_update_crtcs;
 +
        switch (INTEL_INFO(dev_priv)->gen) {
        case 2:
                dev_priv->display.queue_flip = intel_gen2_queue_flip;
@@@ -16202,16 -15548,15 +16204,16 @@@ static void intel_init_quirks(struct dr
  static void i915_disable_vga(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = to_i915(dev);
 +      struct pci_dev *pdev = dev_priv->drm.pdev;
        u8 sr1;
        i915_reg_t vga_reg = i915_vgacntrl_reg(dev);
  
        /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */
 -      vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO);
 +      vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO);
        outb(SR01, VGA_SR_INDEX);
        sr1 = inb(VGA_SR_DATA);
        outb(sr1 | 1<<5, VGA_SR_DATA);
 -      vga_put(dev->pdev, VGA_RSRC_LEGACY_IO);
 +      vga_put(pdev, VGA_RSRC_LEGACY_IO);
        udelay(300);
  
        I915_WRITE(vga_reg, VGA_DISP_DISABLE);
@@@ -16227,6 -15572,7 +16229,6 @@@ void intel_modeset_init_hw(struct drm_d
        dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq;
  
        intel_init_clock_gating(dev);
 -      intel_enable_gt_powersave(dev_priv);
  }
  
  /*
@@@ -16493,22 -15839,15 +16495,22 @@@ static bool intel_crtc_has_encoders(str
        return false;
  }
  
 -static bool intel_encoder_has_connectors(struct intel_encoder *encoder)
 +static struct intel_connector *intel_encoder_find_connector(struct intel_encoder *encoder)
  {
        struct drm_device *dev = encoder->base.dev;
        struct intel_connector *connector;
  
        for_each_connector_on_encoder(dev, &encoder->base, connector)
 -              return true;
 +              return connector;
  
 -      return false;
 +      return NULL;
 +}
 +
 +static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
 +                            enum transcoder pch_transcoder)
 +{
 +      return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) ||
 +              (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
  }
  
  static void intel_sanitize_crtc(struct intel_crtc *crtc)
                 * Temporarily change the plane mapping and disable everything
                 * ...  */
                plane = crtc->plane;
 -              to_intel_plane_state(crtc->base.primary->state)->visible = true;
 +              to_intel_plane_state(crtc->base.primary->state)->base.visible = true;
                crtc->plane = !plane;
                intel_crtc_disable_noatomic(&crtc->base);
                crtc->plane = plane;
                 * worst a fifo underrun happens which also sets this to false.
                 */
                crtc->cpu_fifo_underrun_disabled = true;
 -              crtc->pch_fifo_underrun_disabled = true;
 +              /*
 +               * We track the PCH trancoder underrun reporting state
 +               * within the crtc. With crtc for pipe A housing the underrun
 +               * reporting state for PCH transcoder A, crtc for pipe B housing
 +               * it for PCH transcoder B, etc. LPT-H has only PCH transcoder A,
 +               * and marking underrun reporting as disabled for the non-existing
 +               * PCH transcoders B and C would prevent enabling the south
 +               * error interrupt (see cpt_can_enable_serr_int()).
 +               */
 +              if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe))
 +                      crtc->pch_fifo_underrun_disabled = true;
        }
  }
  
  static void intel_sanitize_encoder(struct intel_encoder *encoder)
  {
        struct intel_connector *connector;
 -      struct drm_device *dev = encoder->base.dev;
  
        /* We need to check both for a crtc link (meaning that the
         * encoder is active and trying to read from a pipe) and the
        bool has_active_crtc = encoder->base.crtc &&
                to_intel_crtc(encoder->base.crtc)->active;
  
 -      if (intel_encoder_has_connectors(encoder) && !has_active_crtc) {
 +      connector = intel_encoder_find_connector(encoder);
 +      if (connector && !has_active_crtc) {
                DRM_DEBUG_KMS("[ENCODER:%d:%s] has active connectors but no active pipe!\n",
                              encoder->base.base.id,
                              encoder->base.name);
                 * fallout from our resume register restoring. Disable
                 * the encoder manually again. */
                if (encoder->base.crtc) {
 +                      struct drm_crtc_state *crtc_state = encoder->base.crtc->state;
 +
                        DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n",
                                      encoder->base.base.id,
                                      encoder->base.name);
 -                      encoder->disable(encoder);
 +                      encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
                        if (encoder->post_disable)
 -                              encoder->post_disable(encoder);
 +                              encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
                }
                encoder->base.crtc = NULL;
  
                 * a bug in one of the get_hw_state functions. Or someplace else
                 * in our code, like the register restore mess on resume. Clamp
                 * things to off as a safer default. */
 -              for_each_intel_connector(dev, connector) {
 -                      if (connector->encoder != encoder)
 -                              continue;
 -                      connector->base.dpms = DRM_MODE_DPMS_OFF;
 -                      connector->base.encoder = NULL;
 -              }
 +
 +              connector->base.dpms = DRM_MODE_DPMS_OFF;
 +              connector->base.encoder = NULL;
        }
        /* Enabled encoders without active connectors will be fixed in
         * the crtc fixup. */
@@@ -16690,10 -16020,10 +16692,10 @@@ static void readout_plane_state(struct 
        struct intel_plane_state *plane_state =
                to_intel_plane_state(primary->state);
  
 -      plane_state->visible = crtc->active &&
 +      plane_state->base.visible = crtc->active &&
                primary_get_hw_state(to_intel_plane(primary));
  
 -      if (plane_state->visible)
 +      if (plane_state->base.visible)
                crtc->base.state->plane_mask |= 1 << drm_plane_index(primary);
  }
  
@@@ -16952,6 -16282,7 +16954,6 @@@ void intel_modeset_gem_init(struct drm_
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct drm_crtc *c;
        struct drm_i915_gem_object *obj;
 -      int ret;
  
        intel_init_gt_powersave(dev_priv);
  
         * for this.
         */
        for_each_crtc(dev, c) {
 +              struct i915_vma *vma;
 +
                obj = intel_fb_obj(c->primary->fb);
                if (obj == NULL)
                        continue;
  
                mutex_lock(&dev->struct_mutex);
 -              ret = intel_pin_and_fence_fb_obj(c->primary->fb,
 +              vma = intel_pin_and_fence_fb_obj(c->primary->fb,
                                                 c->primary->state->rotation);
                mutex_unlock(&dev->struct_mutex);
 -              if (ret) {
 +              if (IS_ERR(vma)) {
                        DRM_ERROR("failed to pin boot fb on pipe %d\n",
                                  to_intel_crtc(c)->pipe);
                        drm_framebuffer_unreference(c->primary->fb);
index 9df29f1cb16af029a7bcbdd93a56562be464a2ef,462056e4b9e48b80cd5bf30ed618b12c27ae6c97..4e1ae3fc462dc65591d2fa5b3f6dffe3ee8a4ad4
@@@ -21,6 -21,7 +21,6 @@@
  #include <drm/drm_atomic.h>
  #include <drm/drm_atomic_helper.h>
  #include <drm/drm_crtc_helper.h>
 -#include <linux/fb.h>
  #include <linux/clk.h>
  #include <linux/errno.h>
  #include <drm/drm_gem_cma_helper.h>
@@@ -60,8 -61,7 +60,8 @@@ static void ipu_crtc_enable(struct drm_
        ipu_di_enable(ipu_crtc->di);
  }
  
 -static void ipu_crtc_disable(struct drm_crtc *crtc)
 +static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
 +                                  struct drm_crtc_state *old_crtc_state)
  {
        struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
        struct ipu_soc *ipu = dev_get_drvdata(ipu_crtc->dev->parent);
        }
        spin_unlock_irq(&crtc->dev->event_lock);
  
 +      /* always disable planes on the CRTC */
 +      drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, true);
++
+       drm_crtc_vblank_off(crtc);
  }
  
  static void imx_drm_crtc_reset(struct drm_crtc *crtc)
@@@ -124,14 -123,9 +126,14 @@@ static void imx_drm_crtc_destroy_state(
        kfree(to_imx_crtc_state(state));
  }
  
 +static void imx_drm_crtc_destroy(struct drm_crtc *crtc)
 +{
 +      imx_drm_remove_crtc(to_ipu_crtc(crtc)->imx_crtc);
 +}
 +
  static const struct drm_crtc_funcs ipu_crtc_funcs = {
        .set_config = drm_atomic_helper_set_config,
 -      .destroy = drm_crtc_cleanup,
 +      .destroy = imx_drm_crtc_destroy,
        .page_flip = drm_atomic_helper_page_flip,
        .reset = imx_drm_crtc_reset,
        .atomic_duplicate_state = imx_drm_crtc_duplicate_state,
@@@ -142,7 -136,7 +144,7 @@@ static irqreturn_t ipu_irq_handler(int 
  {
        struct ipu_crtc *ipu_crtc = dev_id;
  
 -      imx_drm_handle_vblank(ipu_crtc->imx_crtc);
 +      drm_crtc_handle_vblank(&ipu_crtc->base);
  
        return IRQ_HANDLED;
  }
@@@ -183,6 -177,8 +185,8 @@@ static int ipu_crtc_atomic_check(struc
  static void ipu_crtc_atomic_begin(struct drm_crtc *crtc,
                                  struct drm_crtc_state *old_crtc_state)
  {
+       drm_crtc_vblank_on(crtc);
        spin_lock_irq(&crtc->dev->event_lock);
        if (crtc->state->event) {
                WARN_ON(drm_crtc_vblank_get(crtc));
@@@ -250,7 -246,7 +254,7 @@@ static const struct drm_crtc_helper_fun
        .mode_set_nofb = ipu_crtc_mode_set_nofb,
        .atomic_check = ipu_crtc_atomic_check,
        .atomic_begin = ipu_crtc_atomic_begin,
 -      .disable = ipu_crtc_disable,
 +      .atomic_disable = ipu_crtc_atomic_disable,
        .enable = ipu_crtc_enable,
  };
  
@@@ -418,6 -414,8 +422,6 @@@ static void ipu_drm_unbind(struct devic
  {
        struct ipu_crtc *ipu_crtc = dev_get_drvdata(dev);
  
 -      imx_drm_remove_crtc(ipu_crtc->imx_crtc);
 -
        ipu_put_resources(ipu_crtc);
        if (ipu_crtc->plane[1])
                ipu_plane_put_resources(ipu_crtc->plane[1]);
index 0a9b5580b2e92ce8322ad795152c8dd3b1b393f9,85f3047e05aee05aa42a654f9be48cb899964acf..b6ac27e3192964cbf2fa1f5a4c139ab282deaa71
@@@ -196,11 -196,20 +196,20 @@@ int msm_gem_fault(struct vm_area_struc
  {
        struct drm_gem_object *obj = vma->vm_private_data;
        struct drm_device *dev = obj->dev;
+       struct msm_drm_private *priv = dev->dev_private;
        struct page **pages;
        unsigned long pfn;
        pgoff_t pgoff;
        int ret;
  
+       /* This should only happen if userspace tries to pass a mmap'd
+        * but unfaulted gem bo vaddr into submit ioctl, triggering
+        * a page fault while struct_mutex is already held.  This is
+        * not a valid use-case so just bail.
+        */
+       if (priv->struct_mutex_task == current)
+               return VM_FAULT_SIGBUS;
        /* Make sure we don't parallel update on a fault, nor move or remove
         * something from beneath our feet
         */
@@@ -584,16 -593,18 +593,16 @@@ int msm_gem_cpu_prep(struct drm_gem_obj
  {
        struct msm_gem_object *msm_obj = to_msm_bo(obj);
        bool write = !!(op & MSM_PREP_WRITE);
 -
 -      if (op & MSM_PREP_NOSYNC) {
 -              if (!reservation_object_test_signaled_rcu(msm_obj->resv, write))
 -                      return -EBUSY;
 -      } else {
 -              int ret;
 -
 -              ret = reservation_object_wait_timeout_rcu(msm_obj->resv, write,
 -                              true, timeout_to_jiffies(timeout));
 -              if (ret <= 0)
 -                      return ret == 0 ? -ETIMEDOUT : ret;
 -      }
 +      unsigned long remain =
 +              op & MSM_PREP_NOSYNC ? 0 : timeout_to_jiffies(timeout);
 +      long ret;
 +
 +      ret = reservation_object_wait_timeout_rcu(msm_obj->resv, write,
 +                                                true,  remain);
 +      if (ret == 0)
 +              return remain == 0 ? -EBUSY : -ETIMEDOUT;
 +      else if (ret < 0)
 +              return ret;
  
        /* TODO cache maintenance */
  
index 3ac14cd1e5b9a023666a3b576a8ed73de312dc0d,880d6a9af7c8d28dae1beb87492e849702709fb0..b6a0f37a65f30cad9f85d5773fca802e9ea3bb6b
@@@ -15,8 -15,6 +15,8 @@@
   * this program.  If not, see <http://www.gnu.org/licenses/>.
   */
  
 +#include <linux/sync_file.h>
 +
  #include "msm_drv.h"
  #include "msm_gpu.h"
  #include "msm_gem.h"
@@@ -66,6 -64,14 +66,14 @@@ void msm_gem_submit_free(struct msm_gem
        kfree(submit);
  }
  
+ static inline unsigned long __must_check
+ copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
+ {
+       if (access_ok(VERIFY_READ, from, n))
+               return __copy_from_user_inatomic(to, from, n);
+       return -EFAULT;
+ }
  static int submit_lookup_objects(struct msm_gem_submit *submit,
                struct drm_msm_gem_submit *args, struct drm_file *file)
  {
@@@ -73,6 -79,7 +81,7 @@@
        int ret = 0;
  
        spin_lock(&file->table_lock);
+       pagefault_disable();
  
        for (i = 0; i < args->nr_bos; i++) {
                struct drm_msm_gem_submit_bo submit_bo;
                 */
                submit->bos[i].flags = 0;
  
-               ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
-               if (ret) {
-                       ret = -EFAULT;
-                       goto out_unlock;
+               ret = copy_from_user_inatomic(&submit_bo, userptr, sizeof(submit_bo));
+               if (unlikely(ret)) {
+                       pagefault_enable();
+                       spin_unlock(&file->table_lock);
+                       ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
+                       if (ret)
+                               goto out;
+                       spin_lock(&file->table_lock);
+                       pagefault_disable();
                }
  
                if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) {
        }
  
  out_unlock:
-       submit->nr_bos = i;
+       pagefault_enable();
        spin_unlock(&file->table_lock);
  
+ out:
+       submit->nr_bos = i;
        return ret;
  }
  
@@@ -363,9 -378,6 +380,9 @@@ int msm_ioctl_gem_submit(struct drm_dev
        struct msm_file_private *ctx = file->driver_priv;
        struct msm_gem_submit *submit;
        struct msm_gpu *gpu = priv->gpu;
 +      struct fence *in_fence = NULL;
 +      struct sync_file *sync_file = NULL;
 +      int out_fence_fd = -1;
        unsigned i;
        int ret;
  
        /* for now, we just have 3d pipe.. eventually this would need to
         * be more clever to dispatch to appropriate gpu module:
         */
 -      if (args->pipe != MSM_PIPE_3D0)
 +      if (MSM_PIPE_ID(args->flags) != MSM_PIPE_3D0)
 +              return -EINVAL;
 +
 +      if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS)
                return -EINVAL;
  
        ret = mutex_lock_interruptible(&dev->struct_mutex);
        if (ret)
                return ret;
  
 +      if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
 +              out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
 +              if (out_fence_fd < 0) {
 +                      ret = out_fence_fd;
 +                      goto out_unlock;
 +              }
 +      }
+       priv->struct_mutex_task = current;
  
        submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds);
        if (!submit) {
        if (ret)
                goto out;
  
 -      ret = submit_fence_sync(submit);
 -      if (ret)
 -              goto out;
 +      if (args->flags & MSM_SUBMIT_FENCE_FD_IN) {
 +              in_fence = sync_file_get_fence(args->fence_fd);
 +
 +              if (!in_fence) {
 +                      ret = -EINVAL;
 +                      goto out;
 +              }
 +
 +              /* TODO if we get an array-fence due to userspace merging multiple
 +               * fences, we need a way to determine if all the backing fences
 +               * are from our own context..
 +               */
 +
 +              if (in_fence->context != gpu->fctx->context) {
 +                      ret = fence_wait(in_fence, true);
 +                      if (ret)
 +                              goto out;
 +              }
 +
 +      }
 +
 +      if (!(args->fence & MSM_SUBMIT_NO_IMPLICIT)) {
 +              ret = submit_fence_sync(submit);
 +              if (ret)
 +                      goto out;
 +      }
  
        ret = submit_pin_objects(submit);
        if (ret)
  
        submit->nr_cmds = i;
  
 -      ret = msm_gpu_submit(gpu, submit, ctx);
 +      submit->fence = msm_fence_alloc(gpu->fctx);
 +      if (IS_ERR(submit->fence)) {
 +              ret = PTR_ERR(submit->fence);
 +              submit->fence = NULL;
 +              goto out;
 +      }
 +
 +      if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
 +              sync_file = sync_file_create(submit->fence);
 +              if (!sync_file) {
 +                      ret = -ENOMEM;
 +                      goto out;
 +              }
 +      }
 +
 +      msm_gpu_submit(gpu, submit, ctx);
  
        args->fence = submit->fence->seqno;
  
 +      if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
 +              fd_install(out_fence_fd, sync_file->file);
 +              args->fence_fd = out_fence_fd;
 +      }
 +
  out:
 +      if (in_fence)
 +              fence_put(in_fence);
        submit_cleanup(submit);
        if (ret)
                msm_gem_submit_free(submit);
  out_unlock:
 +      if (ret && (out_fence_fd >= 0))
 +              put_unused_fd(out_fence_fd);
+       priv->struct_mutex_task = NULL;
        mutex_unlock(&dev->struct_mutex);
        return ret;
  }
index 4824f70b0258e856f1d613867de1627af70eda34,1dcf39084555b7edb15ab6a816ead9a0fe60374c..a4e9f35da3a22e87a2c35ad18b99f359999f4e9a
@@@ -627,7 -627,9 +627,9 @@@ static u32 atombios_adjust_pll(struct d
                        if (radeon_crtc->ss.refdiv) {
                                radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
                                radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv;
-                               if (rdev->family >= CHIP_RV770)
+                               if (ASIC_IS_AVIVO(rdev) &&
+                                   rdev->family != CHIP_RS780 &&
+                                   rdev->family != CHIP_RS880)
                                        radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
                        }
                }
@@@ -1154,7 -1156,6 +1156,7 @@@ static int dce4_crtc_do_set_base(struc
        u32 tmp, viewport_w, viewport_h;
        int r;
        bool bypass_lut = false;
 +      char *format_name;
  
        /* no fb bound */
        if (!atomic && !crtc->primary->fb) {
                bypass_lut = true;
                break;
        default:
 -              DRM_ERROR("Unsupported screen format %s\n",
 -                        drm_get_format_name(target_fb->pixel_format));
 +              format_name = drm_get_format_name(target_fb->pixel_format);
 +              DRM_ERROR("Unsupported screen format %s\n", format_name);
 +              kfree(format_name);
                return -EINVAL;
        }
  
        WREG32(EVERGREEN_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
               (viewport_w << 16) | viewport_h);
  
 -      /* set pageflip to happen only at start of vblank interval (front porch) */
 -      WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3);
 +      /* set pageflip to happen anywhere in vblank interval */
 +      WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0);
  
        if (!atomic && fb && fb != crtc->primary->fb) {
                radeon_fb = to_radeon_framebuffer(fb);
@@@ -1471,7 -1471,6 +1473,7 @@@ static int avivo_crtc_do_set_base(struc
        u32 viewport_w, viewport_h;
        int r;
        bool bypass_lut = false;
 +      char *format_name;
  
        /* no fb bound */
        if (!atomic && !crtc->primary->fb) {
                bypass_lut = true;
                break;
        default:
 -              DRM_ERROR("Unsupported screen format %s\n",
 -                        drm_get_format_name(target_fb->pixel_format));
 +              format_name = drm_get_format_name(target_fb->pixel_format);
 +              DRM_ERROR("Unsupported screen format %s\n", format_name);
 +              kfree(format_name);
                return -EINVAL;
        }
  
        WREG32(AVIVO_D1MODE_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
               (viewport_w << 16) | viewport_h);
  
 -      /* set pageflip to happen only at start of vblank interval (front porch) */
 -      WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3);
 +      /* set pageflip to happen anywhere in vblank interval */
 +      WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0);
  
        if (!atomic && fb && fb != crtc->primary->fb) {
                radeon_fb = to_radeon_framebuffer(fb);
index a1321b2fa454dc1a6ab8dd87bdbb9ac13a5c2ece,ddef0d4940843105de67327a7c5222d4ee6a78ed..2fdcd04bc93f7b9c6abf5d84752836e154d566b0
@@@ -29,7 -29,6 +29,7 @@@ struct radeon_atpx 
        acpi_handle handle;
        struct radeon_atpx_functions functions;
        bool is_hybrid;
 +      bool dgpu_req_power_for_displays;
  };
  
  static struct radeon_atpx_priv {
@@@ -73,10 -72,6 +73,10 @@@ bool radeon_is_atpx_hybrid(void) 
        return radeon_atpx_priv.atpx.is_hybrid;
  }
  
 +bool radeon_atpx_dgpu_req_power_for_displays(void) {
 +      return radeon_atpx_priv.atpx.dgpu_req_power_for_displays;
 +}
 +
  /**
   * radeon_atpx_call - call an ATPX method
   *
@@@ -203,16 -198,7 +203,7 @@@ static int radeon_atpx_validate(struct 
        atpx->is_hybrid = false;
        if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                printk("ATPX Hybrid Graphics\n");
- #if 1
-               /* This is a temporary hack until the D3 cold support
-                * makes it upstream.  The ATPX power_control method seems
-                * to still work on even if the system should be using
-                * the new standardized hybrid D3 cold ACPI interface.
-                */
-               atpx->functions.power_cntl = true;
- #else
                atpx->functions.power_cntl = false;
- #endif
                atpx->is_hybrid = true;
        }
  
index 27ee0ab0e1a75629c21ebe87761f490cdd3bbcea,c2e0a1ccdfbce8b0db402d1091363e070e465c9d..455268214b893eac36e8bbd65d5e2b18d2735483
@@@ -237,8 -237,7 +237,8 @@@ static int radeon_verify_access(struct 
  
        if (radeon_ttm_tt_has_userptr(bo->ttm))
                return -EPERM;
 -      return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp);
 +      return drm_vma_node_verify_access(&rbo->gem_base.vma_node,
 +                                        filp->private_data);
  }
  
  static void radeon_move_null(struct ttm_buffer_object *bo,
@@@ -264,8 -263,8 +264,8 @@@ static int radeon_move_blit(struct ttm_
  
        rdev = radeon_get_rdev(bo->bdev);
        ridx = radeon_copy_ring_index(rdev);
-       old_start = old_mem->start << PAGE_SHIFT;
-       new_start = new_mem->start << PAGE_SHIFT;
+       old_start = (u64)old_mem->start << PAGE_SHIFT;
+       new_start = (u64)new_mem->start << PAGE_SHIFT;
  
        switch (old_mem->mem_type) {
        case TTM_PL_VRAM:
@@@ -347,7 -346,7 +347,7 @@@ static int radeon_move_vram_ram(struct 
        if (unlikely(r)) {
                goto out_cleanup;
        }
 -      r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
 +      r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem);
  out_cleanup:
        ttm_bo_mem_put(bo, &tmp_mem);
        return r;
@@@ -380,7 -379,7 +380,7 @@@ static int radeon_move_ram_vram(struct 
        if (unlikely(r)) {
                return r;
        }
 -      r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
 +      r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem);
        if (unlikely(r)) {
                goto out_cleanup;
        }
@@@ -445,7 -444,8 +445,7 @@@ static int radeon_bo_move(struct ttm_bu
  
        if (r) {
  memcpy:
 -              r = ttm_bo_move_memcpy(bo, evict, interruptible,
 -                                     no_wait_gpu, new_mem);
 +              r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem);
                if (r) {
                        return r;
                }
index 3c9e7f64b9261efff0faef744f5131a4899cda16,9ecef93854914579ee74b4d96432d15dfaa1fd6b..8703f56b794774ac4130a7b487472e293ebe7cdf
@@@ -16,7 -16,6 +16,7 @@@
  #include <linux/platform_device.h>
  #include <linux/pm_runtime.h>
  #include "drm_fb_cma_helper.h"
 +#include <drm/drm_fb_helper.h>
  
  #include "uapi/drm/vc4_drm.h"
  #include "vc4_drv.h"
@@@ -58,21 -57,21 +58,21 @@@ static int vc4_get_param_ioctl(struct d
        switch (args->param) {
        case DRM_VC4_PARAM_V3D_IDENT0:
                ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-               if (ret)
+               if (ret < 0)
                        return ret;
                args->value = V3D_READ(V3D_IDENT0);
                pm_runtime_put(&vc4->v3d->pdev->dev);
                break;
        case DRM_VC4_PARAM_V3D_IDENT1:
                ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-               if (ret)
+               if (ret < 0)
                        return ret;
                args->value = V3D_READ(V3D_IDENT1);
                pm_runtime_put(&vc4->v3d->pdev->dev);
                break;
        case DRM_VC4_PARAM_V3D_IDENT2:
                ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-               if (ret)
+               if (ret < 0)
                        return ret;
                args->value = V3D_READ(V3D_IDENT2);
                pm_runtime_put(&vc4->v3d->pdev->dev);
@@@ -215,7 -214,7 +215,7 @@@ static void vc4_kick_out_firmware_fb(vo
        ap->ranges[0].base = 0;
        ap->ranges[0].size = ~0;
  
 -      remove_conflicting_framebuffers(ap, "vc4drmfb", false);
 +      drm_fb_helper_remove_conflicting_framebuffers(ap, "vc4drmfb", false);
        kfree(ap);
  }
  
@@@ -233,8 -232,8 +233,8 @@@ static int vc4_drm_bind(struct device *
                return -ENOMEM;
  
        drm = drm_dev_alloc(&vc4_drm_driver, dev);
 -      if (!drm)
 -              return -ENOMEM;
 +      if (IS_ERR(drm))
 +              return PTR_ERR(drm);
        platform_set_drvdata(pdev, drm);
        vc4->dev = drm;
        drm->dev_private = vc4;
index 27c52ec351939eca2131ff2bfd5f198cf097ebaa,b262c5c26f109702572ed613fa0965f06a993ef9..77daea6cb8668df85d70d4b760f4e5a5be41800a
@@@ -419,6 -419,10 +419,6 @@@ again
  
        vc4_flush_caches(dev);
  
 -      /* Disable the binner's pre-loaded overflow memory address */
 -      V3D_WRITE(V3D_BPOA, 0);
 -      V3D_WRITE(V3D_BPOS, 0);
 -
        /* Either put the job in the binner if it uses the binner, or
         * immediately move it to the to-be-rendered queue.
         */
@@@ -530,8 -534,8 +530,8 @@@ vc4_cl_lookup_bos(struct drm_device *de
                return -EINVAL;
        }
  
-       exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
-                          GFP_KERNEL);
+       exec->bo = drm_calloc_large(exec->bo_count,
+                                   sizeof(struct drm_gem_cma_object *));
        if (!exec->bo) {
                DRM_ERROR("Failed to allocate validated BO pointers\n");
                return -ENOMEM;
        spin_unlock(&file_priv->table_lock);
  
  fail:
-       kfree(handles);
-       return 0;
+       drm_free_large(handles);
+       return ret;
  }
  
  static int
@@@ -604,7 -608,7 +604,7 @@@ vc4_get_bcl(struct drm_device *dev, str
         * read the contents back for validation, and I think the
         * bo->vaddr is uncached access.
         */
-       temp = kmalloc(temp_size, GFP_KERNEL);
+       temp = drm_malloc_ab(temp_size, 1);
        if (!temp) {
                DRM_ERROR("Failed to allocate storage for copying "
                          "in bin/render CLs.\n");
        ret = vc4_validate_shader_recs(dev, exec);
  
  fail:
-       kfree(temp);
+       drm_free_large(temp);
        return ret;
  }
  
@@@ -684,7 -688,7 +684,7 @@@ vc4_complete_exec(struct drm_device *de
        if (exec->bo) {
                for (i = 0; i < exec->bo_count; i++)
                        drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
-               kfree(exec->bo);
+               drm_free_large(exec->bo);
        }
  
        while (!list_empty(&exec->unref_list)) {
@@@ -938,8 -942,8 +938,8 @@@ vc4_gem_destroy(struct drm_device *dev
                vc4->overflow_mem = NULL;
        }
  
-       vc4_bo_cache_destroy(dev);
        if (vc4->hang_state)
                vc4_free_hang_state(dev, vc4->hang_state);
+       vc4_bo_cache_destroy(dev);
  }
This page took 0.601705 seconds and 4 git commands to generate.