S: Supported
+ F: Documentation/devicetree/bindings/staging/ion/
F: drivers/staging/android/ion
F: drivers/staging/android/uapi/ion.h
F: drivers/staging/android/uapi/ion_test.h
F: drivers/gpu/drm/arc/
F: Documentation/devicetree/bindings/display/snps,arcpgu.txt
+ ARM ARCHITECTED TIMER DRIVER
+ S: Maintained
+ F: arch/arm/include/asm/arch_timer.h
+ F: arch/arm64/include/asm/arch_timer.h
+ F: drivers/clocksource/arm_arch_timer.c
+
ARM HDLCD DRM DRIVER
S: Supported
ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
S: Maintained
F: drivers/*/*s5pv210*
F: drivers/memory/samsung/*
F: drivers/soc/samsung/*
- F: drivers/spi/spi-s3c*
F: Documentation/arm/Samsung/
F: Documentation/devicetree/bindings/arm/samsung/
F: Documentation/devicetree/bindings/sram/samsung-sram.txt
ARM/UNIPHIER ARCHITECTURE
+ T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git
S: Maintained
F: arch/arm/boot/dts/uniphier*
F: arch/arm/include/asm/hardware/cache-uniphier.h
BONDING DRIVER
W: http://sourceforge.net/projects/bonding/
S: Supported
F: kernel/bpf/
BROADCOM B44 10/100 ETHERNET DRIVER
- M: Gary Zambrano <zambrano@broadcom.com>
+ M: Michael Chan <michael.chan@broadcom.com>
S: Supported
F: drivers/net/ethernet/broadcom/b44.*
CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
- M: Vladimir Davydov <vdavydov@virtuozzo.com>
+ M: Vladimir Davydov <vdavydov.dev@gmail.com>
S: Maintained
F: drivers/net/wan/cosa*
CPMAC ETHERNET DRIVER
S: Maintained
F: drivers/net/ethernet/ti/cpmac.c
F: drivers/gpu/drm/i810/
F: include/uapi/drm/i810_drm.h
+DRM DRIVERS FOR MEDIATEK
+S: Supported
+F: drivers/gpu/drm/mediatek/
+F: Documentation/devicetree/bindings/display/mediatek/
+
DRM DRIVER FOR MSM ADRENO GPU
S: Maintained
F: drivers/edac/sb_edac.c
+ EDAC-SKYLAKE
+ S: Maintained
+ F: drivers/edac/skx_edac.c
+
EDAC-XGENE
APPLIED MICRO (APM) X-GENE SOC EDAC
F: drivers/cpufreq/intel_pstate.c
INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
- M: Maik Broemme <mbroemme@plusserver.de>
+ M: Maik Broemme <mbroemme@libmpq.org>
S: Maintained
F: Documentation/fb/intelfb.txt
F: sound/soc/codecs/max9860.*
MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
S: Supported
F: drivers/power/max14577_charger.c
MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
S: Supported
F: drivers/*/max14577*.c
S: Supported
W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
Q: http://patchwork.kernel.org/project/linux-rdma/list/
- F: drivers/infiniband/hw/rxe/
+ F: drivers/infiniband/sw/rxe/
F: include/uapi/rdma/rdma_user_rxe.h
MEMBARRIER SUPPORT
W: https://fedorahosted.org/dropwatch/
F: net/core/drop_monitor.c
+ NETWORKING [DSA]
+ S: Maintained
+ F: net/dsa/
+ F: include/net/dsa.h
+ F: drivers/net/dsa/
+
NETWORKING [GENERAL]
PIN CONTROLLER - SAMSUNG
F: drivers/platform/x86/samsung-laptop.c
SAMSUNG AUDIO (ASoC) DRIVERS
SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
S: Supported
F: drivers/clk/samsung/
+ SAMSUNG SPI DRIVERS
+ S: Maintained
+ F: Documentation/devicetree/bindings/spi/spi-samsung.txt
+ F: drivers/spi/spi-s3c*
+ F: include/linux/platform_data/spi-s3c64xx.h
+
SAMSUNG SXGBE DRIVERS
F: drivers/staging/vt665?/
STAGING - WILC1000 WIFI DRIVER
S: Supported
F: drivers/staging/wilc1000/
F: net/8021q/
VLYNQ BUS
S: Maintained
F: drivers/vlynq/vlynq.c
#include "amdgpu_ih.h"
#include "amdgpu_irq.h"
#include "amdgpu_ucode.h"
+#include "amdgpu_ttm.h"
#include "amdgpu_gds.h"
#include "amd_powerplay.h"
#include "amdgpu_acp.h"
#include "gpu_scheduler.h"
+#include "amdgpu_virt.h"
/*
* Modules parameters.
extern int amdgpu_modeset;
extern int amdgpu_vram_limit;
extern int amdgpu_gart_size;
+extern int amdgpu_moverate;
extern int amdgpu_benchmarking;
extern int amdgpu_testing;
extern int amdgpu_audio;
extern unsigned amdgpu_cg_mask;
extern unsigned amdgpu_pg_mask;
extern char *amdgpu_disable_cu;
+extern int amdgpu_sclk_deep_sleep_en;
+extern char *amdgpu_virtual_display;
+extern unsigned amdgpu_pp_feature_mask;
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
#define AMDGPU_MAX_RINGS 16
#define AMDGPU_MAX_GFX_RINGS 1
#define AMDGPU_MAX_COMPUTE_RINGS 8
-#define AMDGPU_MAX_VCE_RINGS 2
+#define AMDGPU_MAX_VCE_RINGS 3
/* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 2
uint64_t pe, uint64_t src,
unsigned count);
/* write pte one entry at a time with addr mapping */
- void (*write_pte)(struct amdgpu_ib *ib,
- const dma_addr_t *pages_addr, uint64_t pe,
- uint64_t addr, unsigned count,
- uint32_t incr, uint32_t flags);
+ void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr);
/* for linear pte/pde updates without addr mapping */
void (*set_pte_pde)(struct amdgpu_ib *ib,
uint64_t pe,
/* note usage for clock and power gating */
void (*begin_use)(struct amdgpu_ring *ring);
void (*end_use)(struct amdgpu_ring *ring);
+ void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+ void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
+ unsigned (*get_emit_ib_size) (struct amdgpu_ring *ring);
+ unsigned (*get_dma_frame_size) (struct amdgpu_ring *ring);
};
/*
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
/*
- * TTM.
+ * BO.
*/
-
-#define AMDGPU_TTM_LRU_SIZE 20
-
-struct amdgpu_mman_lru {
- struct list_head *lru[TTM_NUM_MEM_TYPES];
- struct list_head *swap_lru;
-};
-
-struct amdgpu_mman {
- struct ttm_bo_global_ref bo_global_ref;
- struct drm_global_reference mem_global_ref;
- struct ttm_bo_device bdev;
- bool mem_global_referenced;
- bool initialized;
-
-#if defined(CONFIG_DEBUG_FS)
- struct dentry *vram;
- struct dentry *gtt;
-#endif
-
- /* buffer handling */
- const struct amdgpu_buffer_funcs *buffer_funcs;
- struct amdgpu_ring *buffer_funcs_ring;
- /* Scheduler entity for buffer moves */
- struct amd_sched_entity entity;
-
- /* custom LRU management */
- struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE];
- /* guard for log2_size array, don't add anything in between */
- struct amdgpu_mman_lru guard;
-};
-
-int amdgpu_copy_buffer(struct amdgpu_ring *ring,
- uint64_t src_offset,
- uint64_t dst_offset,
- uint32_t byte_count,
- struct reservation_object *resv,
- struct fence **fence);
-int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
--
struct amdgpu_bo_list_entry {
struct amdgpu_bo *robj;
struct ttm_validate_buffer tv;
struct amdgpu_device *adev;
struct drm_gem_object gem_base;
struct amdgpu_bo *parent;
+ struct amdgpu_bo *shadow;
struct ttm_bo_kmap_obj dma_buf_vmap;
struct amdgpu_mn *mn;
struct list_head mn_list;
+ struct list_head shadow_list;
};
#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
int amdgpu_gart_init(struct amdgpu_device *adev);
void amdgpu_gart_fini(struct amdgpu_device *adev);
- void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages);
- int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
int pages, struct page **pagelist,
dma_addr_t *dma_addr, uint32_t flags);
+int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
/*
* GPU MC structures, functions & helpers
uint32_t fw_version;
struct amdgpu_irq_src vm_fault;
uint32_t vram_type;
+ uint32_t srbm_soft_reset;
+ struct amdgpu_mode_mc_save save;
};
/*
*/
struct amdgpu_flip_work {
- struct work_struct flip_work;
+ struct delayed_work flip_work;
struct work_struct unpin_work;
struct amdgpu_device *adev;
int crtc_id;
+ u32 target_vblank;
uint64_t base;
struct drm_pending_vblank_event *event;
struct amdgpu_bo *old_rbo;
/* maximum number of VMIDs */
#define AMDGPU_NUM_VM 16
+/* Maximum number of PTEs the hardware can write with one command */
+#define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF
+
/* number of entries in page table */
#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
/* PTBs (Page Table Blocks) need to be aligned to 32K */
#define AMDGPU_VM_PTB_ALIGN_SIZE 32768
-#define AMDGPU_VM_PTB_ALIGN_MASK (AMDGPU_VM_PTB_ALIGN_SIZE - 1)
-#define AMDGPU_VM_PTB_ALIGN(a) (((a) + AMDGPU_VM_PTB_ALIGN_MASK) & ~AMDGPU_VM_PTB_ALIGN_MASK)
+
+/* LOG2 number of continuous pages for the fragment field */
+#define AMDGPU_LOG2_PAGES_PER_FRAG 4
#define AMDGPU_PTE_VALID (1 << 0)
#define AMDGPU_PTE_SYSTEM (1 << 1)
#define AMDGPU_PTE_READABLE (1 << 5)
#define AMDGPU_PTE_WRITEABLE (1 << 6)
-/* PTE (Page Table Entry) fragment field for different page sizes */
-#define AMDGPU_PTE_FRAG_4KB (0 << 7)
-#define AMDGPU_PTE_FRAG_64KB (4 << 7)
-#define AMDGPU_LOG2_PAGES_PER_FRAG 4
+#define AMDGPU_PTE_FRAG(x) ((x & 0x1f) << 7)
/* How to programm VM fault handling */
#define AMDGPU_VM_FAULT_STOP_NEVER 0
struct amdgpu_vm_pt {
struct amdgpu_bo_list_entry entry;
uint64_t addr;
+ uint64_t shadow_addr;
};
struct amdgpu_vm {
struct amdgpu_job *job);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
-uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_sync *sync);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
- struct ttm_mem_reg *mem);
+ bool clear);
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo);
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
spinlock_t ring_lock;
struct fence **fences;
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
+ bool preamble_presented;
};
struct amdgpu_ctx_mgr {
unsigned ce_ram_size;
struct amdgpu_cu_info cu_info;
const struct amdgpu_gfx_funcs *funcs;
+
+ /* reset mask */
+ uint32_t grbm_soft_reset;
+ uint32_t srbm_soft_reset;
};
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct fence *fence;
uint64_t bytes_moved_threshold;
uint64_t bytes_moved;
+ struct amdgpu_bo_list_entry *evictable;
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
};
+#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
+#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
+#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
+
struct amdgpu_job {
struct amd_sched_job base;
struct amdgpu_device *adev;
struct amdgpu_sync sync;
struct amdgpu_ib *ibs;
struct fence *fence; /* the hw fence */
+ uint32_t preamble_status;
uint32_t num_ibs;
void *owner;
- uint64_t ctx;
+ uint64_t fence_ctx; /* the fence_context this job uses */
bool vm_needs_flush;
unsigned vm_id;
uint64_t vm_pd_addr;
bool address_64_bit;
bool use_ctx_buf;
struct amd_sched_entity entity;
+ uint32_t srbm_soft_reset;
};
/*
struct amdgpu_irq_src irq;
unsigned harvest_config;
struct amd_sched_entity entity;
+ uint32_t srbm_soft_reset;
+ unsigned num_rings;
};
/*
struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+#ifdef CONFIG_DRM_AMDGPU_SI
+ //SI DMA has a difference trap irq number for the second engine
+ struct amdgpu_irq_src trap_irq_1;
+#endif
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
int num_instances;
+ uint32_t srbm_soft_reset;
};
/*
bool (*read_disabled_bios)(struct amdgpu_device *adev);
bool (*read_bios_from_rom)(struct amdgpu_device *adev,
u8 *bios, u32 length_bytes);
+ void (*detect_hw_virtualization) (struct amdgpu_device *adev);
int (*read_register)(struct amdgpu_device *adev, u32 se_num,
u32 sh_num, u32 reg_offset, u32 *value);
void (*set_vga_state)(struct amdgpu_device *adev, bool state);
/* MM block clocks */
int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
- /* query virtual capabilities */
- u32 (*get_virtual_caps)(struct amdgpu_device *adev);
+ /* static power management */
+ int (*get_pcie_lanes)(struct amdgpu_device *adev);
+ void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
};
/*
struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev);
void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
-
-/* GPU virtualization */
-#define AMDGPU_VIRT_CAPS_SRIOV_EN (1 << 0)
-#define AMDGPU_VIRT_CAPS_IS_VF (1 << 1)
-struct amdgpu_virtualization {
- bool supports_sr_iov;
- bool is_virtual;
- u32 caps;
-};
-
/*
* Core structure, functions and helpers.
*/
bool valid;
bool sw;
bool hw;
+ bool hang;
};
struct amdgpu_device {
spinlock_t pcie_idx_lock;
amdgpu_rreg_t pcie_rreg;
amdgpu_wreg_t pcie_wreg;
+ amdgpu_rreg_t pciep_rreg;
+ amdgpu_wreg_t pciep_wreg;
/* protects concurrent UVD register access */
spinlock_t uvd_ctx_idx_lock;
amdgpu_rreg_t uvd_ctx_rreg;
atomic64_t num_evictions;
atomic_t gpu_reset_counter;
+ /* data for buffer migration throttling */
+ struct {
+ spinlock_t lock;
+ s64 last_update_us;
+ s64 accum_us; /* accumulated microseconds */
+ u32 log2_max_MBps;
+ } mm_stats;
+
/* display */
+ bool enable_virtual_display;
struct amdgpu_mode_info mode_info;
struct work_struct hotplug_work;
struct amdgpu_irq_src crtc_irq;
struct kfd_dev *kfd;
struct amdgpu_virtualization virtualization;
+
+ /* link all shadow bo */
+ struct list_head shadow_list;
+ struct mutex shadow_list_lock;
+ /* link all gtt */
+ spinlock_t gtt_list_lock;
+ struct list_head gtt_list;
+
};
bool amdgpu_device_is_px(struct drm_device *dev);
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
+#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
+#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
#define REG_GET_FIELD(value, reg, field) \
(((value) & REG_FIELD_MASK(reg, field)) >> REG_FIELD_SHIFT(reg, field))
+#define WREG32_FIELD(reg, field, val) \
+ WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+
/*
* BIOS helpers.
*/
#define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
#define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
#define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
-#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev)))
+#define amdgpu_get_pcie_lanes(adev) (adev)->asic_funcs->get_pcie_lanes((adev))
+#define amdgpu_set_pcie_lanes(adev, l) (adev)->asic_funcs->set_pcie_lanes((adev), (l))
+#define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
#define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
+#define amdgpu_asic_detect_hw_virtualization(adev) (adev)->asic_funcs->detect_hw_virtualization((adev))
#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
-#define amdgpu_vm_write_pte(adev, ib, pa, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pa), (pe), (addr), (count), (incr), (flags)))
+#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
+#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+#define amdgpu_ring_get_emit_ib_size(r) (r)->funcs->get_emit_ib_size((r))
+#define amdgpu_ring_get_dma_frame_size(r) (r)->funcs->get_dma_frame_size((r))
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
+#define amdgpu_dpm_read_sensor(adev, idx, value) \
+ ((adev)->pp_enabled ? \
+ (adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value)) : \
+ -EINVAL)
+
#define amdgpu_dpm_get_temperature(adev) \
((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
(adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
(adev)->pm.funcs->powergate_vce((adev), (g)))
-#define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
- ((adev)->pp_enabled ? \
- (adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
- (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
-
#define amdgpu_dpm_get_current_power_state(adev) \
(adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
/* Common functions */
int amdgpu_gpu_reset(struct amdgpu_device *adev);
+bool amdgpu_need_backup(struct amdgpu_device *adev);
void amdgpu_pci_config_reset(struct amdgpu_device *adev);
bool amdgpu_card_posted(struct amdgpu_device *adev);
void amdgpu_update_display_priority(struct amdgpu_device *adev);
void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base);
void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
+u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev);
+int amdgpu_ttm_global_init(struct amdgpu_device *adev);
+int amdgpu_ttm_init(struct amdgpu_device *adev);
+void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_program_register_sequence(struct amdgpu_device *adev,
const u32 *registers,
const u32 array_size);
void amdgpu_unregister_atpx_handler(void);
bool amdgpu_has_atpx_dgpu_power_cntl(void);
bool amdgpu_is_atpx_hybrid(void);
+bool amdgpu_atpx_dgpu_req_power_for_displays(void);
#else
static inline void amdgpu_register_atpx_handler(void) {}
static inline void amdgpu_unregister_atpx_handler(void) {}
static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
+static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; }
#endif
/*
struct drm_file *file_priv);
void amdgpu_driver_preclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
-int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon);
-int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon);
+int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon);
+int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
struct amdgpu_bo_va_mapping *
amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
uint64_t addr, struct amdgpu_bo **bo);
+int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
#include "amdgpu_object.h"
#endif
DRM_MODE_CONNECTOR_Unknown
};
+bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ struct atom_context *ctx = mode_info->atom_context;
+ int index = GetIndexIntoMasterTable(DATA, Object_Header);
+ u16 size, data_offset;
+ u8 frev, crev;
+ ATOM_DISPLAY_OBJECT_PATH_TABLE *path_obj;
+ ATOM_OBJECT_HEADER *obj_header;
+
+ if (!amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset))
+ return false;
+
+ if (crev < 2)
+ return false;
+
+ obj_header = (ATOM_OBJECT_HEADER *) (ctx->bios + data_offset);
+ path_obj = (ATOM_DISPLAY_OBJECT_PATH_TABLE *)
+ (ctx->bios + data_offset +
+ le16_to_cpu(obj_header->usDisplayPathTableOffset));
+
+ if (path_obj->ucNumOfDispPath)
+ return true;
+ else
+ return false;
+}
+
bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *adev)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
(le16_to_cpu(path->usConnObjectId) &
OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
+ /* Skip TV/CV support */
+ if ((le16_to_cpu(path->usDeviceTag) ==
+ ATOM_DEVICE_TV1_SUPPORT) ||
+ (le16_to_cpu(path->usDeviceTag) ==
+ ATOM_DEVICE_CV_SUPPORT))
+ continue;
+
+ if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) {
+ DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n",
+ con_obj_id, le16_to_cpu(path->usDeviceTag));
+ continue;
+ }
+
connector_type =
object_connector_convert[con_obj_id];
connector_object_id = con_obj_id;
return -EINVAL;
switch (crev) {
+ case 2:
+ case 3:
+ case 5:
+ /* r6xx, r7xx, evergreen, ni, si.
+ * TODO: add support for asic_type <= CHIP_RV770*/
+ if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
+ args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ dividers->post_div = args.v3.ucPostDiv;
+ dividers->enable_post_div = (args.v3.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
+ dividers->enable_dithen = (args.v3.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
+ dividers->whole_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDiv);
+ dividers->frac_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDivFrac);
+ dividers->ref_div = args.v3.ucRefDiv;
+ dividers->vco_mode = (args.v3.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
+ } else {
+ /* for SI we use ComputeMemoryClockParam for memory plls */
+ if (adev->asic_type >= CHIP_TAHITI)
+ return -EINVAL;
+ args.v5.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
+ if (strobe_mode)
+ args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ dividers->post_div = args.v5.ucPostDiv;
+ dividers->enable_post_div = (args.v5.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
+ dividers->enable_dithen = (args.v5.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
+ dividers->whole_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDiv);
+ dividers->frac_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDivFrac);
+ dividers->ref_div = args.v5.ucRefDiv;
+ dividers->vco_mode = (args.v5.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
+ }
+ break;
case 4:
/* fusion */
args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
}
+void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
+ u16 *vddc, u16 *vddci, u16 *mvdd)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index = GetIndexIntoMasterTable(DATA, FirmwareInfo);
+ u8 frev, crev;
+ u16 data_offset;
+ union firmware_info *firmware_info;
+
+ *vddc = 0;
+ *vddci = 0;
+ *mvdd = 0;
+
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ firmware_info =
+ (union firmware_info *)(mode_info->atom_context->bios +
+ data_offset);
+ *vddc = le16_to_cpu(firmware_info->info_14.usBootUpVDDCVoltage);
+ if ((frev == 2) && (crev >= 2)) {
+ *vddci = le16_to_cpu(firmware_info->info_22.usBootUpVDDCIVoltage);
+ *mvdd = le16_to_cpu(firmware_info->info_22.usBootUpMVDDCVoltage);
+ }
+ }
+}
+
union set_voltage {
struct _SET_VOLTAGE_PS_ALLOCATION alloc;
struct _SET_VOLTAGE_PARAMETERS v1;
struct _SET_VOLTAGE_PARAMETERS_V1_3 v3;
};
+int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
+ u16 voltage_id, u16 *voltage)
+{
+ union set_voltage args;
+ int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
+ u8 frev, crev;
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ return -EINVAL;
+
+ switch (crev) {
+ case 1:
+ return -EINVAL;
+ case 2:
+ args.v2.ucVoltageType = SET_VOLTAGE_GET_MAX_VOLTAGE;
+ args.v2.ucVoltageMode = 0;
+ args.v2.usVoltageLevel = 0;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ *voltage = le16_to_cpu(args.v2.usVoltageLevel);
+ break;
+ case 3:
+ args.v3.ucVoltageType = voltage_type;
+ args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
+ args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ *voltage = le16_to_cpu(args.v3.usVoltageLevel);
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
+ u16 *voltage,
+ u16 leakage_idx)
+{
+ return amdgpu_atombios_get_max_vddc(adev, VOLTAGE_TYPE_VDDC, leakage_idx, voltage);
+}
+
void amdgpu_atombios_set_voltage(struct amdgpu_device *adev,
u16 voltage_level,
u8 voltage_type)
return NULL;
}
+int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
+ u8 voltage_type,
+ u8 *svd_gpio_id, u8 *svc_gpio_id)
+{
+ int index = GetIndexIntoMasterTable(DATA, VoltageObjectInfo);
+ u8 frev, crev;
+ u16 data_offset, size;
+ union voltage_object_info *voltage_info;
+ union voltage_object *voltage_object = NULL;
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+ &frev, &crev, &data_offset)) {
+ voltage_info = (union voltage_object_info *)
+ (adev->mode_info.atom_context->bios + data_offset);
+
+ switch (frev) {
+ case 3:
+ switch (crev) {
+ case 1:
+ voltage_object = (union voltage_object *)
+ amdgpu_atombios_lookup_voltage_object_v3(&voltage_info->v3,
+ voltage_type,
+ VOLTAGE_OBJ_SVID2);
+ if (voltage_object) {
+ *svd_gpio_id = voltage_object->v3.asSVID2Obj.ucSVDGpioId;
+ *svc_gpio_id = voltage_object->v3.asSVID2Obj.ucSVCGpioId;
+ } else {
+ return -EINVAL;
+ }
+ break;
+ default:
+ DRM_ERROR("unknown voltage object table\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ DRM_ERROR("unknown voltage object table\n");
+ return -EINVAL;
+ }
+
+ }
+ return 0;
+}
+
bool
amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
u8 voltage_type, u8 voltage_mode)
acpi_handle handle;
struct amdgpu_atpx_functions functions;
bool is_hybrid;
+ bool dgpu_req_power_for_displays;
};
static struct amdgpu_atpx_priv {
return amdgpu_atpx_priv.atpx.is_hybrid;
}
+bool amdgpu_atpx_dgpu_req_power_for_displays(void) {
+ return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays;
+}
+
/**
* amdgpu_atpx_call - call an ATPX method
*
atpx->is_hybrid = false;
if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
printk("ATPX Hybrid Graphics\n");
- #if 1
- /* This is a temporary hack until the D3 cold support
- * makes it upstream. The ATPX power_control method seems
- * to still work on even if the system should be using
- * the new standardized hybrid D3 cold ACPI interface.
- */
- atpx->functions.power_cntl = true;
- #else
atpx->functions.power_cntl = false;
- #endif
atpx->is_hybrid = true;
}
+ atpx->dgpu_req_power_for_displays = false;
+ if (valid_bits & ATPX_DGPU_REQ_POWER_FOR_DISPLAYS)
+ atpx->dgpu_req_power_for_displays = true;
+
return 0;
}
bool skip_preamble, need_ctx_switch;
unsigned patch_offset = ~0;
struct amdgpu_vm *vm;
- uint64_t ctx;
+ uint64_t fence_ctx;
+ uint32_t status = 0, alloc_size;
unsigned i;
int r = 0;
/* ring tests don't use a job */
if (job) {
vm = job->vm;
- ctx = job->ctx;
+ fence_ctx = job->fence_ctx;
} else {
vm = NULL;
- ctx = 0;
+ fence_ctx = 0;
}
if (!ring->ready) {
- dev_err(adev->dev, "couldn't schedule ib\n");
+ dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
return -EINVAL;
}
return -EINVAL;
}
- r = amdgpu_ring_alloc(ring, 256 * num_ibs);
+ alloc_size = amdgpu_ring_get_dma_frame_size(ring) +
+ num_ibs * amdgpu_ring_get_emit_ib_size(ring);
+
+ r = amdgpu_ring_alloc(ring, alloc_size);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r;
/* always set cond_exec_polling to CONTINUE */
*ring->cond_exe_cpu_addr = 1;
- skip_preamble = ring->current_ctx == ctx;
- need_ctx_switch = ring->current_ctx != ctx;
+ skip_preamble = ring->current_ctx == fence_ctx;
+ need_ctx_switch = ring->current_ctx != fence_ctx;
+ if (job && ring->funcs->emit_cntxcntl) {
+ if (need_ctx_switch)
+ status |= AMDGPU_HAVE_CTX_SWITCH;
+ status |= job->preamble_status;
+ amdgpu_ring_emit_cntxcntl(ring, status);
+ }
+
for (i = 0; i < num_ibs; ++i) {
ib = &ibs[i];
/* drop preamble IBs if we don't have a context switch */
- if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
+ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
+ skip_preamble &&
+ !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST))
continue;
amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
amdgpu_ring_patch_cond_exec(ring, patch_offset);
- ring->current_ctx = ctx;
+ ring->current_ctx = fence_ctx;
+ if (ring->funcs->emit_switch_buffer)
+ amdgpu_ring_emit_switch_buffer(ring);
amdgpu_ring_commit(ring);
return 0;
}
int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
{
unsigned i;
- int r;
+ int r, ret = 0;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
} else {
/* still not good, but we can live with it */
DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r);
+ ret = r;
}
}
}
- return 0;
+ return ret;
}
/*
#include <ttm/ttm_placement.h>
#include <ttm/ttm_module.h>
#include <ttm/ttm_page_alloc.h>
+#include <ttm/ttm_memory.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
#include <linux/seq_file.h>
ttm_mem_global_release(ref->object);
}
-static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
+int amdgpu_ttm_global_init(struct amdgpu_device *adev)
{
struct drm_global_reference *global_ref;
struct amdgpu_ring *ring;
global_ref->init = &amdgpu_ttm_mem_global_init;
global_ref->release = &amdgpu_ttm_mem_global_release;
r = drm_global_item_ref(global_ref);
- if (r != 0) {
+ if (r) {
DRM_ERROR("Failed setting up TTM memory accounting "
"subsystem.\n");
- return r;
+ goto error_mem;
}
adev->mman.bo_global_ref.mem_glob =
global_ref->init = &ttm_bo_global_init;
global_ref->release = &ttm_bo_global_release;
r = drm_global_item_ref(global_ref);
- if (r != 0) {
+ if (r) {
DRM_ERROR("Failed setting up TTM BO subsystem.\n");
- drm_global_item_unref(&adev->mman.mem_global_ref);
- return r;
+ goto error_bo;
}
ring = adev->mman.buffer_funcs_ring;
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
rq, amdgpu_sched_jobs);
- if (r != 0) {
+ if (r) {
DRM_ERROR("Failed setting up TTM BO move run queue.\n");
- drm_global_item_unref(&adev->mman.mem_global_ref);
- drm_global_item_unref(&adev->mman.bo_global_ref.ref);
- return r;
+ goto error_entity;
}
adev->mman.mem_global_referenced = true;
return 0;
+
+error_entity:
+ drm_global_item_unref(&adev->mman.bo_global_ref.ref);
+error_bo:
+ drm_global_item_unref(&adev->mman.mem_global_ref);
+error_mem:
+ return r;
}
static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
.lpfn = 0,
.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
};
+ unsigned i;
if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
placement->placement = &placements;
rbo = container_of(bo, struct amdgpu_bo, tbo);
switch (bo->mem.mem_type) {
case TTM_PL_VRAM:
- if (rbo->adev->mman.buffer_funcs_ring->ready == false)
+ if (rbo->adev->mman.buffer_funcs_ring->ready == false) {
amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_CPU);
- else
+ } else {
amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_GTT);
+ for (i = 0; i < rbo->placement.num_placement; ++i) {
+ if (!(rbo->placements[i].flags &
+ TTM_PL_FLAG_TT))
+ continue;
+
+ if (rbo->placements[i].lpfn)
+ continue;
+
+ /* set an upper limit to force directly
+ * allocating address space for the BO.
+ */
+ rbo->placements[i].lpfn =
+ rbo->adev->mc.gtt_size >> PAGE_SHIFT;
+ }
+ }
break;
case TTM_PL_TT:
default:
if (amdgpu_ttm_tt_get_usermm(bo->ttm))
return -EPERM;
- return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp);
+ return drm_vma_node_verify_access(&rbo->gem_base.vma_node,
+ filp->private_data);
}
static void amdgpu_move_null(struct ttm_buffer_object *bo,
adev = amdgpu_get_adev(bo->bdev);
ring = adev->mman.buffer_funcs_ring;
- old_start = old_mem->start << PAGE_SHIFT;
- new_start = new_mem->start << PAGE_SHIFT;
+ old_start = (u64)old_mem->start << PAGE_SHIFT;
+ new_start = (u64)new_mem->start << PAGE_SHIFT;
switch (old_mem->mem_type) {
- case TTM_PL_VRAM:
- old_start += adev->mc.vram_start;
- break;
case TTM_PL_TT:
- old_start += adev->mc.gtt_start;
+ r = amdgpu_ttm_bind(bo->ttm, old_mem);
+ if (r)
+ return r;
+
+ case TTM_PL_VRAM:
+ old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
break;
default:
DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
return -EINVAL;
}
switch (new_mem->mem_type) {
- case TTM_PL_VRAM:
- new_start += adev->mc.vram_start;
- break;
case TTM_PL_TT:
- new_start += adev->mc.gtt_start;
+ r = amdgpu_ttm_bind(bo->ttm, new_mem);
+ if (r)
+ return r;
+
+ case TTM_PL_VRAM:
+ new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
break;
default:
DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
r = amdgpu_copy_buffer(ring, old_start, new_start,
new_mem->num_pages * PAGE_SIZE, /* bytes */
- bo->resv, &fence);
+ bo->resv, &fence, false);
if (r)
return r;
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements.fpfn = 0;
- placements.lpfn = 0;
+ placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
interruptible, no_wait_gpu);
if (unlikely(r)) {
goto out_cleanup;
}
- r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
+ r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem);
out_cleanup:
ttm_bo_mem_put(bo, &tmp_mem);
return r;
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements.fpfn = 0;
- placements.lpfn = 0;
+ placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
interruptible, no_wait_gpu);
if (unlikely(r)) {
return r;
}
- r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
+ r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem);
if (unlikely(r)) {
goto out_cleanup;
}
if (r) {
memcpy:
- r = ttm_bo_move_memcpy(bo, evict, interruptible,
- no_wait_gpu, new_mem);
+ r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem);
if (r) {
return r;
}
spinlock_t guptasklock;
struct list_head guptasks;
atomic_t mmu_invalidations;
+ struct list_head list;
};
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
struct ttm_mem_reg *bo_mem)
{
struct amdgpu_ttm_tt *gtt = (void*)ttm;
- uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
int r;
if (gtt->userptr) {
return r;
}
}
- gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
+ gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
if (!ttm->num_pages) {
WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
ttm->num_pages, bo_mem, ttm);
bo_mem->mem_type == AMDGPU_PL_OA)
return -EINVAL;
+ return 0;
+}
+
+bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+
+ return gtt && !list_empty(>t->list);
+}
+
+int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ uint32_t flags;
+ int r;
+
+ if (!ttm || amdgpu_ttm_is_bound(ttm))
+ return 0;
+
+ flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
ttm->pages, gtt->ttm.dma_address, flags);
if (r) {
- DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
- ttm->num_pages, (unsigned)gtt->offset);
+ DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+ ttm->num_pages, gtt->offset);
return r;
}
+ spin_lock(>t->adev->gtt_list_lock);
+ list_add_tail(>t->list, >t->adev->gtt_list);
+ spin_unlock(>t->adev->gtt_list_lock);
+ return 0;
+}
+
+int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
+{
+ struct amdgpu_ttm_tt *gtt, *tmp;
+ struct ttm_mem_reg bo_mem;
+ uint32_t flags;
+ int r;
+
+ bo_mem.mem_type = TTM_PL_TT;
+ spin_lock(&adev->gtt_list_lock);
+ list_for_each_entry_safe(gtt, tmp, &adev->gtt_list, list) {
+ flags = amdgpu_ttm_tt_pte_flags(gtt->adev, >t->ttm.ttm, &bo_mem);
+ r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
+ gtt->ttm.ttm.pages, gtt->ttm.dma_address,
+ flags);
+ if (r) {
+ spin_unlock(&adev->gtt_list_lock);
+ DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+ gtt->ttm.ttm.num_pages, gtt->offset);
+ return r;
+ }
+ }
+ spin_unlock(&adev->gtt_list_lock);
return 0;
}
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ if (!amdgpu_ttm_is_bound(ttm))
+ return 0;
+
/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
if (gtt->adev->gart.ready)
amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
if (gtt->userptr)
amdgpu_ttm_tt_unpin_userptr(ttm);
+ spin_lock(>t->adev->gtt_list_lock);
+ list_del_init(>t->list);
+ spin_unlock(>t->adev->gtt_list_lock);
+
return 0;
}
kfree(gtt);
return NULL;
}
+ INIT_LIST_HEAD(>t->list);
return >t->ttm.ttm;
}
unsigned i, j;
int r;
- r = amdgpu_ttm_global_init(adev);
- if (r) {
- return r;
- }
/* No others user of address space so set it to 0 */
r = ttm_bo_device_init(&adev->mman.bdev,
adev->mman.bo_global_ref.ref.object,
uint64_t dst_offset,
uint32_t byte_count,
struct reservation_object *resv,
- struct fence **fence)
+ struct fence **fence, bool direct_submit)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
byte_count -= cur_size_in_bytes;
}
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+ if (direct_submit) {
+ r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
+ NULL, NULL, fence);
+ job->fence = fence_get(*fence);
+ if (r)
+ DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ amdgpu_job_free(job);
+ } else {
+ r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED, fence);
+ if (r)
+ goto error_free;
+ }
+
+ return r;
+
+error_free:
+ amdgpu_job_free(job);
+ return r;
+}
+
+int amdgpu_fill_buffer(struct amdgpu_bo *bo,
+ uint32_t src_data,
+ struct reservation_object *resv,
+ struct fence **fence)
+{
+ struct amdgpu_device *adev = bo->adev;
+ struct amdgpu_job *job;
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+
+ uint32_t max_bytes, byte_count;
+ uint64_t dst_offset;
+ unsigned int num_loops, num_dw;
+ unsigned int i;
+ int r;
+
+ byte_count = bo->tbo.num_pages << PAGE_SHIFT;
+ max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
+ num_loops = DIV_ROUND_UP(byte_count, max_bytes);
+ num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
+
+ /* for IB padding */
+ while (num_dw & 0x7)
+ num_dw++;
+
+ r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
+ if (r)
+ return r;
+
+ if (resv) {
+ r = amdgpu_sync_resv(adev, &job->sync, resv,
+ AMDGPU_FENCE_OWNER_UNDEFINED);
+ if (r) {
+ DRM_ERROR("sync failed (%d).\n", r);
+ goto error_free;
+ }
+ }
+
+ dst_offset = bo->tbo.mem.start << PAGE_SHIFT;
+ for (i = 0; i < num_loops; i++) {
+ uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
+
+ amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
+ dst_offset, cur_size_in_bytes);
+
+ dst_offset += cur_size_in_bytes;
+ byte_count -= cur_size_in_bytes;
+ }
+
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
r = amdgpu_job_submit(job, ring, &adev->mman.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, fence);
+ AMDGPU_FENCE_OWNER_UNDEFINED, fence);
if (r)
goto error_free;
#endif
}
+
+u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev)
+{
+ return ttm_get_kernel_zone_memory_size(adev->mman.mem_global_ref.object);
+}
bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
+ AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
+ AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
- r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
- NULL, NULL, &adev->uvd.vcpu_bo);
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
+ &adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
return r;
}
- r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
- if (r) {
- amdgpu_bo_unref(&adev->uvd.vcpu_bo);
- dev_err(adev->dev, "(%d) failed to reserve UVD bo\n", r);
- return r;
- }
-
- r = amdgpu_bo_pin(adev->uvd.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM,
- &adev->uvd.gpu_addr);
- if (r) {
- amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
- amdgpu_bo_unref(&adev->uvd.vcpu_bo);
- dev_err(adev->dev, "(%d) UVD bo pin failed\n", r);
- return r;
- }
-
- r = amdgpu_bo_kmap(adev->uvd.vcpu_bo, &adev->uvd.cpu_addr);
- if (r) {
- dev_err(adev->dev, "(%d) UVD map failed\n", r);
- return r;
- }
-
- amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
-
ring = &adev->uvd.ring;
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity,
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
{
- int r;
-
kfree(adev->uvd.saved_bo);
amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
- if (adev->uvd.vcpu_bo) {
- r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
- if (!r) {
- amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
- amdgpu_bo_unpin(adev->uvd.vcpu_bo);
- amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
- }
-
- amdgpu_bo_unref(&adev->uvd.vcpu_bo);
- }
+ amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
+ &adev->uvd.gpu_addr,
+ (void **)&adev->uvd.cpu_addr);
amdgpu_ring_fini(&adev->uvd.ring);
if (!adev->uvd.saved_bo)
return -ENOMEM;
- memcpy(adev->uvd.saved_bo, ptr, size);
+ memcpy_fromio(adev->uvd.saved_bo, ptr, size);
return 0;
}
ptr = adev->uvd.cpu_addr;
if (adev->uvd.saved_bo != NULL) {
- memcpy(ptr, adev->uvd.saved_bo, size);
+ memcpy_toio(ptr, adev->uvd.saved_bo, size);
kfree(adev->uvd.saved_bo);
adev->uvd.saved_bo = NULL;
} else {
hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset,
- (adev->uvd.fw->size) - offset);
+ memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
size -= le32_to_cpu(hdr->ucode_size_bytes);
ptr += le32_to_cpu(hdr->ucode_size_bytes);
- memset(ptr, 0, size);
+ memset_io(ptr, 0, size);
}
return 0;
return r;
break;
case mmUVD_ENGINE_CNTL:
+ case mmUVD_NO_OP:
break;
default:
DRM_ERROR("Invalid reg 0x%X!\n", reg);
return -EINVAL;
}
+ r = amdgpu_cs_sysvm_access_required(parser);
+ if (r)
+ return r;
+
ctx.parser = parser;
ctx.buf_sizes = buf_sizes;
ctx.ib_idx = ib_idx;
ib->ptr[3] = addr >> 32;
ib->ptr[4] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
ib->ptr[5] = 0;
- for (i = 6; i < 16; ++i)
- ib->ptr[i] = PACKET2(0);
+ for (i = 6; i < 16; i += 2) {
+ ib->ptr[i] = PACKET0(mmUVD_NO_OP, 0);
+ ib->ptr[i+1] = 0;
+ }
ib->length_dw = 16;
if (direct) {
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, uvd.idle_work.work);
- unsigned i, fences, handles = 0;
-
- fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
-
- for (i = 0; i < adev->uvd.max_handles; ++i)
- if (atomic_read(&adev->uvd.handles[i]))
- ++handles;
+ unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
- if (fences == 0 && handles == 0) {
+ if (fences == 0) {
if (adev->pm.dpm_enabled) {
amdgpu_dpm_enable_uvd(adev, false);
} else {
r = 0;
}
- error:
fence_put(fence);
+
+ error:
return r;
}
* SI supports 16.
*/
-/* Special value that no flush is necessary */
-#define AMDGPU_VM_NO_FLUSH (~0ll)
-
/* Local structure. Encapsulate some VM table update parameters to reduce
* the number of function parameters
*/
-struct amdgpu_vm_update_params {
+struct amdgpu_pte_update_params {
+ /* amdgpu device we do this update for */
+ struct amdgpu_device *adev;
/* address where to copy page table entries from */
uint64_t src;
- /* DMA addresses to use for mapping */
- dma_addr_t *pages_addr;
/* indirect buffer to fill with commands */
struct amdgpu_ib *ib;
+ /* Function which actually does the update */
+ void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
+ uint64_t addr, unsigned count, uint32_t incr,
+ uint32_t flags);
+ /* indicate update pt or its shadow */
+ bool shadow;
};
/**
}
/**
- * amdgpu_vm_update_pages - helper to call the right asic function
+ * amdgpu_vm_do_set_ptes - helper to call the right asic function
*
- * @adev: amdgpu_device pointer
- * @vm_update_params: see amdgpu_vm_update_params definition
+ * @params: see amdgpu_pte_update_params definition
* @pe: addr of the page entry
* @addr: dst addr to write into pe
* @count: number of page entries to update
* Traces the parameters and calls the right asic functions
* to setup the page table using the DMA.
*/
-static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
- struct amdgpu_vm_update_params
- *vm_update_params,
- uint64_t pe, uint64_t addr,
- unsigned count, uint32_t incr,
- uint32_t flags)
+static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
+ uint64_t pe, uint64_t addr,
+ unsigned count, uint32_t incr,
+ uint32_t flags)
{
trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
- if (vm_update_params->src) {
- amdgpu_vm_copy_pte(adev, vm_update_params->ib,
- pe, (vm_update_params->src + (addr >> 12) * 8), count);
-
- } else if (vm_update_params->pages_addr) {
- amdgpu_vm_write_pte(adev, vm_update_params->ib,
- vm_update_params->pages_addr,
- pe, addr, count, incr, flags);
-
- } else if (count < 3) {
- amdgpu_vm_write_pte(adev, vm_update_params->ib, NULL, pe, addr,
- count, incr, flags);
+ if (count < 3) {
+ amdgpu_vm_write_pte(params->adev, params->ib, pe,
+ addr | flags, count, incr);
} else {
- amdgpu_vm_set_pte_pde(adev, vm_update_params->ib, pe, addr,
+ amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
count, incr, flags);
}
}
+/**
+ * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
+ *
+ * @params: see amdgpu_pte_update_params definition
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: hw access flags
+ *
+ * Traces the parameters and calls the DMA function to copy the PTEs.
+ */
+static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
+ uint64_t pe, uint64_t addr,
+ unsigned count, uint32_t incr,
+ uint32_t flags)
+{
+ trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
+
+ amdgpu_vm_copy_pte(params->adev, params->ib, pe,
+ (params->src + (addr >> 12) * 8), count);
+}
+
/**
* amdgpu_vm_clear_bo - initially clear the page dir/table
*
struct amdgpu_ring *ring;
struct fence *fence = NULL;
struct amdgpu_job *job;
- struct amdgpu_vm_update_params vm_update_params;
+ struct amdgpu_pte_update_params params;
unsigned entries;
uint64_t addr;
int r;
- memset(&vm_update_params, 0, sizeof(vm_update_params));
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
r = reservation_object_reserve_shared(bo->tbo.resv);
if (r)
goto error;
- vm_update_params.ib = &job->ibs[0];
- amdgpu_vm_update_pages(adev, &vm_update_params, addr, 0, entries,
- 0, 0);
+ memset(¶ms, 0, sizeof(params));
+ params.adev = adev;
+ params.ib = &job->ibs[0];
+ amdgpu_vm_do_set_ptes(¶ms, addr, 0, entries, 0, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > 64);
* Look up the physical address of the page that the pte resolves
* to and return the pointer for the page table entry.
*/
-uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
+static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
{
uint64_t result;
- if (pages_addr) {
- /* page table offset */
- result = pages_addr[addr >> PAGE_SHIFT];
-
- /* in case cpu page size != gpu page size*/
- result |= addr & (~PAGE_MASK);
+ /* page table offset */
+ result = pages_addr[addr >> PAGE_SHIFT];
- } else {
- /* No mapping required */
- result = addr;
- }
+ /* in case cpu page size != gpu page size*/
+ result |= addr & (~PAGE_MASK);
result &= 0xFFFFFFFFFFFFF000ULL;
return result;
}
-/**
- * amdgpu_vm_update_pdes - make sure that page directory is valid
- *
- * @adev: amdgpu_device pointer
- * @vm: requested vm
- * @start: start of GPU address range
- * @end: end of GPU address range
- *
- * Allocates new page tables if necessary
- * and updates the page directory.
- * Returns 0 for success, error for failure.
- */
-int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ bool shadow)
{
struct amdgpu_ring *ring;
- struct amdgpu_bo *pd = vm->page_directory;
- uint64_t pd_addr = amdgpu_bo_gpu_offset(pd);
+ struct amdgpu_bo *pd = shadow ? vm->page_directory->shadow :
+ vm->page_directory;
+ uint64_t pd_addr;
uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
uint64_t last_pde = ~0, last_pt = ~0;
unsigned count = 0, pt_idx, ndw;
struct amdgpu_job *job;
- struct amdgpu_vm_update_params vm_update_params;
+ struct amdgpu_pte_update_params params;
struct fence *fence = NULL;
int r;
- memset(&vm_update_params, 0, sizeof(vm_update_params));
+ if (!pd)
+ return 0;
+ pd_addr = amdgpu_bo_gpu_offset(pd);
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
/* padding, etc. */
if (r)
return r;
- vm_update_params.ib = &job->ibs[0];
+ memset(¶ms, 0, sizeof(params));
+ params.adev = adev;
+ params.ib = &job->ibs[0];
/* walk over the address space and update the page directory */
for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
continue;
pt = amdgpu_bo_gpu_offset(bo);
- if (vm->page_tables[pt_idx].addr == pt)
- continue;
- vm->page_tables[pt_idx].addr = pt;
+ if (!shadow) {
+ if (vm->page_tables[pt_idx].addr == pt)
+ continue;
+ vm->page_tables[pt_idx].addr = pt;
+ } else {
+ if (vm->page_tables[pt_idx].shadow_addr == pt)
+ continue;
+ vm->page_tables[pt_idx].shadow_addr = pt;
+ }
pde = pd_addr + pt_idx * 8;
if (((last_pde + 8 * count) != pde) ||
- ((last_pt + incr * count) != pt)) {
+ ((last_pt + incr * count) != pt) ||
+ (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
if (count) {
- amdgpu_vm_update_pages(adev, &vm_update_params,
- last_pde, last_pt,
- count, incr,
- AMDGPU_PTE_VALID);
+ amdgpu_vm_do_set_ptes(¶ms, last_pde,
+ last_pt, count, incr,
+ AMDGPU_PTE_VALID);
}
count = 1;
}
if (count)
- amdgpu_vm_update_pages(adev, &vm_update_params,
- last_pde, last_pt,
- count, incr, AMDGPU_PTE_VALID);
+ amdgpu_vm_do_set_ptes(¶ms, last_pde, last_pt,
+ count, incr, AMDGPU_PTE_VALID);
- if (vm_update_params.ib->length_dw != 0) {
- amdgpu_ring_pad_ib(ring, vm_update_params.ib);
+ if (params.ib->length_dw != 0) {
+ amdgpu_ring_pad_ib(ring, params.ib);
amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv,
AMDGPU_FENCE_OWNER_VM);
- WARN_ON(vm_update_params.ib->length_dw > ndw);
+ WARN_ON(params.ib->length_dw > ndw);
r = amdgpu_job_submit(job, ring, &vm->entity,
AMDGPU_FENCE_OWNER_VM, &fence);
if (r)
return r;
}
-/**
- * amdgpu_vm_frag_ptes - add fragment information to PTEs
+/*
+ * amdgpu_vm_update_pdes - make sure that page directory is valid
*
* @adev: amdgpu_device pointer
- * @vm_update_params: see amdgpu_vm_update_params definition
- * @pe_start: first PTE to handle
- * @pe_end: last PTE to handle
- * @addr: addr those PTEs should point to
- * @flags: hw mapping flags
+ * @vm: requested vm
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ *
+ * Allocates new page tables if necessary
+ * and updates the page directory.
+ * Returns 0 for success, error for failure.
*/
-static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
- struct amdgpu_vm_update_params
- *vm_update_params,
- uint64_t pe_start, uint64_t pe_end,
- uint64_t addr, uint32_t flags)
+int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
{
- /**
- * The MC L1 TLB supports variable sized pages, based on a fragment
- * field in the PTE. When this field is set to a non-zero value, page
- * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
- * flags are considered valid for all PTEs within the fragment range
- * and corresponding mappings are assumed to be physically contiguous.
- *
- * The L1 TLB can store a single PTE for the whole fragment,
- * significantly increasing the space available for translation
- * caching. This leads to large improvements in throughput when the
- * TLB is under pressure.
- *
- * The L2 TLB distributes small and large fragments into two
- * asymmetric partitions. The large fragment cache is significantly
- * larger. Thus, we try to use large fragments wherever possible.
- * Userspace can support this by aligning virtual base address and
- * allocation size to the fragment size.
- */
-
- /* SI and newer are optimized for 64KB */
- uint64_t frag_flags = AMDGPU_PTE_FRAG_64KB;
- uint64_t frag_align = 0x80;
-
- uint64_t frag_start = ALIGN(pe_start, frag_align);
- uint64_t frag_end = pe_end & ~(frag_align - 1);
-
- unsigned count;
-
- /* Abort early if there isn't anything to do */
- if (pe_start == pe_end)
- return;
-
- /* system pages are non continuously */
- if (vm_update_params->src || vm_update_params->pages_addr ||
- !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
-
- count = (pe_end - pe_start) / 8;
- amdgpu_vm_update_pages(adev, vm_update_params, pe_start,
- addr, count, AMDGPU_GPU_PAGE_SIZE,
- flags);
- return;
- }
-
- /* handle the 4K area at the beginning */
- if (pe_start != frag_start) {
- count = (frag_start - pe_start) / 8;
- amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr,
- count, AMDGPU_GPU_PAGE_SIZE, flags);
- addr += AMDGPU_GPU_PAGE_SIZE * count;
- }
-
- /* handle the area in the middle */
- count = (frag_end - frag_start) / 8;
- amdgpu_vm_update_pages(adev, vm_update_params, frag_start, addr, count,
- AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
+ int r;
- /* handle the 4K area at the end */
- if (frag_end != pe_end) {
- addr += AMDGPU_GPU_PAGE_SIZE * count;
- count = (pe_end - frag_end) / 8;
- amdgpu_vm_update_pages(adev, vm_update_params, frag_end, addr,
- count, AMDGPU_GPU_PAGE_SIZE, flags);
- }
+ r = amdgpu_vm_update_pd_or_shadow(adev, vm, true);
+ if (r)
+ return r;
+ return amdgpu_vm_update_pd_or_shadow(adev, vm, false);
}
/**
* amdgpu_vm_update_ptes - make sure that page tables are valid
*
- * @adev: amdgpu_device pointer
- * @vm_update_params: see amdgpu_vm_update_params definition
+ * @params: see amdgpu_pte_update_params definition
* @vm: requested vm
* @start: start of GPU address range
* @end: end of GPU address range
*
* Update the page tables in the range @start - @end.
*/
-static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
- struct amdgpu_vm_update_params
- *vm_update_params,
+static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
struct amdgpu_vm *vm,
uint64_t start, uint64_t end,
uint64_t dst, uint32_t flags)
{
const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
- uint64_t cur_pe_start, cur_pe_end, cur_dst;
+ uint64_t cur_pe_start, cur_nptes, cur_dst;
uint64_t addr; /* next GPU address to be updated */
uint64_t pt_idx;
struct amdgpu_bo *pt;
addr = start;
pt_idx = addr >> amdgpu_vm_block_size;
pt = vm->page_tables[pt_idx].entry.robj;
-
+ if (params->shadow) {
+ if (!pt->shadow)
+ return;
+ pt = vm->page_tables[pt_idx].entry.robj->shadow;
+ }
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
else
cur_pe_start = amdgpu_bo_gpu_offset(pt);
cur_pe_start += (addr & mask) * 8;
- cur_pe_end = cur_pe_start + 8 * nptes;
+ cur_nptes = nptes;
cur_dst = dst;
/* for next ptb*/
while (addr < end) {
pt_idx = addr >> amdgpu_vm_block_size;
pt = vm->page_tables[pt_idx].entry.robj;
+ if (params->shadow) {
+ if (!pt->shadow)
+ return;
+ pt = vm->page_tables[pt_idx].entry.robj->shadow;
+ }
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
next_pe_start = amdgpu_bo_gpu_offset(pt);
next_pe_start += (addr & mask) * 8;
- if (cur_pe_end == next_pe_start) {
+ if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
+ ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
/* The next ptb is consecutive to current ptb.
- * Don't call amdgpu_vm_frag_ptes now.
+ * Don't call the update function now.
* Will update two ptbs together in future.
*/
- cur_pe_end += 8 * nptes;
+ cur_nptes += nptes;
} else {
- amdgpu_vm_frag_ptes(adev, vm_update_params,
- cur_pe_start, cur_pe_end,
- cur_dst, flags);
+ params->func(params, cur_pe_start, cur_dst, cur_nptes,
+ AMDGPU_GPU_PAGE_SIZE, flags);
cur_pe_start = next_pe_start;
- cur_pe_end = next_pe_start + 8 * nptes;
+ cur_nptes = nptes;
cur_dst = dst;
}
dst += nptes * AMDGPU_GPU_PAGE_SIZE;
}
- amdgpu_vm_frag_ptes(adev, vm_update_params, cur_pe_start,
- cur_pe_end, cur_dst, flags);
+ params->func(params, cur_pe_start, cur_dst, cur_nptes,
+ AMDGPU_GPU_PAGE_SIZE, flags);
+}
+
+/*
+ * amdgpu_vm_frag_ptes - add fragment information to PTEs
+ *
+ * @params: see amdgpu_pte_update_params definition
+ * @vm: requested vm
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @dst: addr those PTEs should point to
+ * @flags: hw mapping flags
+ */
+static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
+ struct amdgpu_vm *vm,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint32_t flags)
+{
+ /**
+ * The MC L1 TLB supports variable sized pages, based on a fragment
+ * field in the PTE. When this field is set to a non-zero value, page
+ * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
+ * flags are considered valid for all PTEs within the fragment range
+ * and corresponding mappings are assumed to be physically contiguous.
+ *
+ * The L1 TLB can store a single PTE for the whole fragment,
+ * significantly increasing the space available for translation
+ * caching. This leads to large improvements in throughput when the
+ * TLB is under pressure.
+ *
+ * The L2 TLB distributes small and large fragments into two
+ * asymmetric partitions. The large fragment cache is significantly
+ * larger. Thus, we try to use large fragments wherever possible.
+ * Userspace can support this by aligning virtual base address and
+ * allocation size to the fragment size.
+ */
+
+ const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
+
+ uint64_t frag_start = ALIGN(start, frag_align);
+ uint64_t frag_end = end & ~(frag_align - 1);
+
+ uint32_t frag;
+
+ /* system pages are non continuously */
+ if (params->src || !(flags & AMDGPU_PTE_VALID) ||
+ (frag_start >= frag_end)) {
+
+ amdgpu_vm_update_ptes(params, vm, start, end, dst, flags);
+ return;
+ }
+
+ /* use more than 64KB fragment size if possible */
+ frag = lower_32_bits(frag_start | frag_end);
+ frag = likely(frag) ? __ffs(frag) : 31;
+
+ /* handle the 4K area at the beginning */
+ if (start != frag_start) {
+ amdgpu_vm_update_ptes(params, vm, start, frag_start,
+ dst, flags);
+ dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
+ }
+
+ /* handle the area in the middle */
+ amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
+ flags | AMDGPU_PTE_FRAG(frag));
+
+ /* handle the 4K area at the end */
+ if (frag_end != end) {
+ dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
+ amdgpu_vm_update_ptes(params, vm, frag_end, end, dst, flags);
+ }
}
/**
void *owner = AMDGPU_FENCE_OWNER_VM;
unsigned nptes, ncmds, ndw;
struct amdgpu_job *job;
- struct amdgpu_vm_update_params vm_update_params;
+ struct amdgpu_pte_update_params params;
struct fence *f = NULL;
int r;
+ memset(¶ms, 0, sizeof(params));
+ params.adev = adev;
+ params.src = src;
+
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
- memset(&vm_update_params, 0, sizeof(vm_update_params));
- vm_update_params.src = src;
- vm_update_params.pages_addr = pages_addr;
+
+ memset(¶ms, 0, sizeof(params));
+ params.adev = adev;
+ params.src = src;
/* sync to everything on unmapping */
if (!(flags & AMDGPU_PTE_VALID))
/* padding, etc. */
ndw = 64;
- if (vm_update_params.src) {
+ if (src) {
/* only copy commands needed */
ndw += ncmds * 7;
- } else if (vm_update_params.pages_addr) {
- /* header for write data commands */
- ndw += ncmds * 4;
+ params.func = amdgpu_vm_do_copy_ptes;
+
+ } else if (pages_addr) {
+ /* copy commands needed */
+ ndw += ncmds * 7;
- /* body of write data command */
+ /* and also PTEs */
ndw += nptes * 2;
+ params.func = amdgpu_vm_do_copy_ptes;
+
} else {
/* set page commands needed */
ndw += ncmds * 10;
/* two extra commands for begin/end of fragment */
ndw += 2 * 10;
+
+ params.func = amdgpu_vm_do_set_ptes;
}
r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
if (r)
return r;
- vm_update_params.ib = &job->ibs[0];
+ params.ib = &job->ibs[0];
+
+ if (!src && pages_addr) {
+ uint64_t *pte;
+ unsigned i;
+
+ /* Put the PTEs at the end of the IB. */
+ i = ndw - nptes * 2;
+ pte= (uint64_t *)&(job->ibs->ptr[i]);
+ params.src = job->ibs->gpu_addr + i * 4;
+
+ for (i = 0; i < nptes; ++i) {
+ pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
+ AMDGPU_GPU_PAGE_SIZE);
+ pte[i] |= flags;
+ }
+ }
r = amdgpu_sync_fence(adev, &job->sync, exclusive);
if (r)
if (r)
goto error_free;
- amdgpu_vm_update_ptes(adev, &vm_update_params, vm, start,
- last + 1, addr, flags);
+ params.shadow = true;
+ amdgpu_vm_frag_ptes(¶ms, vm, start, last + 1, addr, flags);
+ params.shadow = false;
+ amdgpu_vm_frag_ptes(¶ms, vm, start, last + 1, addr, flags);
- amdgpu_ring_pad_ib(ring, vm_update_params.ib);
- WARN_ON(vm_update_params.ib->length_dw > ndw);
+ amdgpu_ring_pad_ib(ring, params.ib);
+ WARN_ON(params.ib->length_dw > ndw);
r = amdgpu_job_submit(job, ring, &vm->entity,
AMDGPU_FENCE_OWNER_VM, &f);
if (r)
*
* @adev: amdgpu_device pointer
* @bo_va: requested BO and VM object
- * @mem: ttm mem
+ * @clear: if true clear the entries
*
* Fill in the page table entries for @bo_va.
* Returns 0 for success, -EINVAL for failure.
- *
- * Object have to be reserved and mutex must be locked!
*/
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
- struct ttm_mem_reg *mem)
+ bool clear)
{
struct amdgpu_vm *vm = bo_va->vm;
struct amdgpu_bo_va_mapping *mapping;
dma_addr_t *pages_addr = NULL;
uint32_t gtt_flags, flags;
+ struct ttm_mem_reg *mem;
struct fence *exclusive;
uint64_t addr;
int r;
- if (mem) {
+ if (clear) {
+ mem = NULL;
+ addr = 0;
+ exclusive = NULL;
+ } else {
struct ttm_dma_tt *ttm;
+ mem = &bo_va->bo->tbo.mem;
addr = (u64)mem->start << PAGE_SHIFT;
switch (mem->mem_type) {
case TTM_PL_TT:
}
exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
- } else {
- addr = 0;
- exclusive = NULL;
}
flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
- gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
+ gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
+ adev == bo_va->bo->adev) ? flags : 0;
spin_lock(&vm->status_lock);
if (!list_empty(&bo_va->vm_status))
spin_lock(&vm->status_lock);
list_splice_init(&bo_va->invalids, &bo_va->valids);
list_del_init(&bo_va->vm_status);
- if (!mem)
+ if (clear)
list_add(&bo_va->vm_status, &vm->cleared);
spin_unlock(&vm->status_lock);
struct amdgpu_bo_va, vm_status);
spin_unlock(&vm->status_lock);
- r = amdgpu_vm_bo_update(adev, bo_va, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, true);
if (r)
return r;
r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
AMDGPU_GPU_PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
+ AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+ AMDGPU_GEM_CREATE_SHADOW,
NULL, resv, &pt);
if (r)
goto error_free;
r = amd_sched_entity_init(&ring->sched, &vm->entity,
rq, amdgpu_sched_jobs);
if (r)
- return r;
+ goto err;
vm->page_directory_fence = NULL;
r = amdgpu_bo_create(adev, pd_size, align, true,
AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
+ AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+ AMDGPU_GEM_CREATE_SHADOW,
NULL, NULL, &vm->page_directory);
if (r)
goto error_free_sched_entity;
error_free_sched_entity:
amd_sched_entity_fini(&ring->sched, &vm->entity);
+ err:
+ drm_free_large(vm->page_tables);
+
return r;
}
kfree(mapping);
}
- for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
+ for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) {
+ if (vm->page_tables[i].entry.robj &&
+ vm->page_tables[i].entry.robj->shadow)
+ amdgpu_bo_unref(&vm->page_tables[i].entry.robj->shadow);
amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
+ }
drm_free_large(vm->page_tables);
+ if (vm->page_directory->shadow)
+ amdgpu_bo_unref(&vm->page_directory->shadow);
amdgpu_bo_unref(&vm->page_directory);
fence_put(vm->page_directory_fence);
}
static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
+ static int cik_sdma_soft_reset(void *handle);
MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
MODULE_FIRMWARE("radeon/bonaire_sdma1.bin");
uint64_t pe, uint64_t src,
unsigned count)
{
- while (count) {
- unsigned bytes = count * 8;
- if (bytes > 0x1FFFF8)
- bytes = 0x1FFFF8;
-
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
- SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
- ib->ptr[ib->length_dw++] = bytes;
- ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
- ib->ptr[ib->length_dw++] = lower_32_bits(src);
- ib->ptr[ib->length_dw++] = upper_32_bits(src);
- ib->ptr[ib->length_dw++] = lower_32_bits(pe);
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
- pe += bytes;
- src += bytes;
- count -= bytes / 8;
- }
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
+ SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+ ib->ptr[ib->length_dw++] = bytes;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
}
/**
*
* @ib: indirect buffer to fill with commands
* @pe: addr of the page entry
- * @addr: dst addr to write into pe
+ * @value: dst addr to write into pe
* @count: number of page entries to update
* @incr: increase next addr by incr bytes
- * @flags: access flags
*
* Update PTEs by writing them manually using sDMA (CIK).
*/
-static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib,
- const dma_addr_t *pages_addr, uint64_t pe,
- uint64_t addr, unsigned count,
- uint32_t incr, uint32_t flags)
+static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
{
- uint64_t value;
- unsigned ndw;
-
- while (count) {
- ndw = count * 2;
- if (ndw > 0xFFFFE)
- ndw = 0xFFFFE;
-
- /* for non-physically contiguous pages (system) */
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
- SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
- ib->ptr[ib->length_dw++] = pe;
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = ndw;
- for (; ndw > 0; ndw -= 2, --count, pe += 8) {
- value = amdgpu_vm_map_gart(pages_addr, addr);
- addr += incr;
- value |= flags;
- ib->ptr[ib->length_dw++] = value;
- ib->ptr[ib->length_dw++] = upper_32_bits(value);
- }
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+ SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
}
}
*
* Update the page tables using sDMA (CIK).
*/
-static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
- uint64_t pe,
+static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
- uint64_t value;
- unsigned ndw;
-
- while (count) {
- ndw = count;
- if (ndw > 0x7FFFF)
- ndw = 0x7FFFF;
-
- if (flags & AMDGPU_PTE_VALID)
- value = addr;
- else
- value = 0;
-
- /* for physically contiguous pages (vram) */
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
- ib->ptr[ib->length_dw++] = pe; /* dst addr */
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = flags; /* mask */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = value; /* value */
- ib->ptr[ib->length_dw++] = upper_32_bits(value);
- ib->ptr[ib->length_dw++] = incr; /* increment size */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
- pe += ndw * 8;
- addr += ndw * incr;
- count -= ndw;
- }
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = flags; /* mask */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count; /* number of entries */
}
/**
amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
}
+static unsigned cik_sdma_ring_get_emit_ib_size(struct amdgpu_ring *ring)
+{
+ return
+ 7 + 4; /* cik_sdma_ring_emit_ib */
+}
+
+static unsigned cik_sdma_ring_get_dma_frame_size(struct amdgpu_ring *ring)
+{
+ return
+ 6 + /* cik_sdma_ring_emit_hdp_flush */
+ 3 + /* cik_sdma_ring_emit_hdp_invalidate */
+ 6 + /* cik_sdma_ring_emit_pipeline_sync */
+ 12 + /* cik_sdma_ring_emit_vm_flush */
+ 9 + 9 + 9; /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
+}
+
static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
bool enable)
{
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ cik_sdma_soft_reset(handle);
+
return cik_sdma_hw_init(adev);
}
.test_ib = cik_sdma_ring_test_ib,
.insert_nop = cik_sdma_ring_insert_nop,
.pad_ib = cik_sdma_ring_pad_ib,
+ .get_emit_ib_size = cik_sdma_ring_get_emit_ib_size,
+ .get_dma_frame_size = cik_sdma_ring_get_dma_frame_size,
};
static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
return (~data) & mask;
}
+static void
+gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
+{
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
+ SE_XSEL(1) | SE_YSEL(1);
+ *rconf1 |= 0x0;
+ break;
+ case CHIP_HAWAII:
+ *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
+ RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
+ PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
+ SE_YSEL(3);
+ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
+ SE_PAIR_YSEL(2);
+ break;
+ case CHIP_KAVERI:
+ *rconf |= RB_MAP_PKR0(2);
+ *rconf1 |= 0x0;
+ break;
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ *rconf |= 0x0;
+ *rconf1 |= 0x0;
+ break;
+ default:
+ DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
+ break;
+ }
+}
+
+static void
+gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
+ u32 raster_config, u32 raster_config_1,
+ unsigned rb_mask, unsigned num_rb)
+{
+ unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
+ unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
+ unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
+ unsigned rb_per_se = num_rb / num_se;
+ unsigned se_mask[4];
+ unsigned se;
+
+ se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
+ se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
+ se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
+ se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+
+ WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
+ WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
+ WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
+
+ if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
+ (!se_mask[2] && !se_mask[3]))) {
+ raster_config_1 &= ~SE_PAIR_MAP_MASK;
+
+ if (!se_mask[0] && !se_mask[1]) {
+ raster_config_1 |=
+ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
+ } else {
+ raster_config_1 |=
+ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
+ }
+ }
+
+ for (se = 0; se < num_se; se++) {
+ unsigned raster_config_se = raster_config;
+ unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
+ unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+ int idx = (se / 2) * 2;
+
+ if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
+ raster_config_se &= ~SE_MAP_MASK;
+
+ if (!se_mask[idx]) {
+ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
+ } else {
+ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
+ }
+ }
+
+ pkr0_mask &= rb_mask;
+ pkr1_mask &= rb_mask;
+ if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
+ raster_config_se &= ~PKR_MAP_MASK;
+
+ if (!pkr0_mask) {
+ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
+ } else {
+ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
+ }
+ }
+
+ if (rb_per_se >= 2) {
+ unsigned rb0_mask = 1 << (se * rb_per_se);
+ unsigned rb1_mask = rb0_mask << 1;
+
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= ~RB_MAP_PKR0_MASK;
+
+ if (!rb0_mask) {
+ raster_config_se |=
+ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se |=
+ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+
+ if (rb_per_se > 2) {
+ rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
+ rb1_mask = rb0_mask << 1;
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= ~RB_MAP_PKR1_MASK;
+
+ if (!rb0_mask) {
+ raster_config_se |=
+ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se |=
+ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+ }
+ }
+
+ /* GRBM_GFX_INDEX has a different offset on CI+ */
+ gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
+ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
+ }
+
+ /* GRBM_GFX_INDEX has a different offset on CI+ */
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+}
+
/**
* gfx_v7_0_setup_rb - setup the RBs on the asic
*
{
int i, j;
u32 data;
+ u32 raster_config = 0, raster_config_1 = 0;
u32 active_rbs = 0;
u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se;
+ unsigned num_rb_pipes;
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
}
}
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
- mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
+
+ num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines, 16);
+
+ gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
+
+ if (!adev->gfx.config.backend_enable_mask ||
+ adev->gfx.config.num_rbs >= num_rb_pipes) {
+ WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
+ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
+ } else {
+ gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
+ adev->gfx.config.backend_enable_mask,
+ num_rb_pipes);
+ }
+ mutex_unlock(&adev->grbm_idx_mutex);
}
/**
amdgpu_ring_write(ring, control);
}
+static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs */
+ dw2 |= 0x10002;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
/**
* gfx_v7_0_ring_test_ib - basic ring IB test
*
return 0;
}
-static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+static u32 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
}
(void)RREG32(mmCP_RB0_WPTR);
}
-static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
-{
- return ring->adev->wb.wb[ring->rptr_offs];
-}
-
static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
u64 wb_gpu_addr;
u32 *buf;
struct bonaire_mqd *mqd;
-
- gfx_v7_0_cp_compute_enable(adev, true);
+ struct amdgpu_ring *ring;
/* fix up chicken bits */
tmp = RREG32(mmCP_CPF_DEBUG);
/* init the queues. Just two for now. */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+ ring = &adev->gfx.compute_ring[i];
if (ring->mqd_obj == NULL) {
r = amdgpu_bo_create(adev,
amdgpu_bo_unreserve(ring->mqd_obj);
ring->ready = true;
+ }
+
+ gfx_v7_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
}
+static unsigned gfx_v7_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
+{
+ return
+ 4; /* gfx_v7_0_ring_emit_ib_gfx */
+}
+
+static unsigned gfx_v7_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
+{
+ return
+ 20 + /* gfx_v7_0_ring_emit_gds_switch */
+ 7 + /* gfx_v7_0_ring_emit_hdp_flush */
+ 5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
+ 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
+ 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
+ 17 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
+ 3; /* gfx_v7_ring_emit_cntxcntl */
+}
+
+static unsigned gfx_v7_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
+{
+ return
+ 4; /* gfx_v7_0_ring_emit_ib_compute */
+}
+
+static unsigned gfx_v7_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
+{
+ return
+ 20 + /* gfx_v7_0_ring_emit_gds_switch */
+ 7 + /* gfx_v7_0_ring_emit_hdp_flush */
+ 5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
+ 7 + /* gfx_v7_0_ring_emit_pipeline_sync */
+ 17 + /* gfx_v7_0_ring_emit_vm_flush */
+ 7 + 7 + 7; /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+}
+
static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v7_0_select_se_sh,
}
/* reserve GDS, GWS and OA resource for gfx */
- r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
- PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GDS, 0,
- NULL, NULL, &adev->gds.gds_gfx_bo);
+ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
+ &adev->gds.gds_gfx_bo, NULL, NULL);
if (r)
return r;
- r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
- PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GWS, 0,
- NULL, NULL, &adev->gds.gws_gfx_bo);
+ r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
+ &adev->gds.gws_gfx_bo, NULL, NULL);
if (r)
return r;
- r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
- PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_OA, 0,
- NULL, NULL, &adev->gds.oa_gfx_bo);
+ r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
+ &adev->gds.oa_gfx_bo, NULL, NULL);
if (r)
return r;
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
- amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
- amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
+ amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
- .get_rptr = gfx_v7_0_ring_get_rptr_gfx,
+ .get_rptr = gfx_v7_0_ring_get_rptr,
.get_wptr = gfx_v7_0_ring_get_wptr_gfx,
.set_wptr = gfx_v7_0_ring_set_wptr_gfx,
.parse_cs = NULL,
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
+ .get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_gfx,
+ .get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_gfx,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
- .get_rptr = gfx_v7_0_ring_get_rptr_compute,
+ .get_rptr = gfx_v7_0_ring_get_rptr,
.get_wptr = gfx_v7_0_ring_get_wptr_compute,
.set_wptr = gfx_v7_0_ring_set_wptr_compute,
.parse_cs = NULL,
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
+ .get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_compute,
+ .get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_compute,
};
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
*/
static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
{
- u32 rptr;
-
/* XXX check if swapping is necessary on BE */
- rptr = ring->adev->wb.wb[ring->rptr_offs] >> 2;
-
- return rptr;
+ return ring->adev->wb.wb[ring->rptr_offs] >> 2;
}
/**
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
- } else if (r) {
+ } else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
uint64_t pe, uint64_t src,
unsigned count)
{
- while (count) {
- unsigned bytes = count * 8;
- if (bytes > 0x1FFFF8)
- bytes = 0x1FFFF8;
-
- ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
- SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
- ib->ptr[ib->length_dw++] = bytes;
- ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
- ib->ptr[ib->length_dw++] = lower_32_bits(src);
- ib->ptr[ib->length_dw++] = upper_32_bits(src);
- ib->ptr[ib->length_dw++] = lower_32_bits(pe);
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
- pe += bytes;
- src += bytes;
- count -= bytes / 8;
- }
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
}
/**
*
* @ib: indirect buffer to fill with commands
* @pe: addr of the page entry
- * @addr: dst addr to write into pe
+ * @value: dst addr to write into pe
* @count: number of page entries to update
* @incr: increase next addr by incr bytes
- * @flags: access flags
*
* Update PTEs by writing them manually using sDMA (CIK).
*/
-static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib,
- const dma_addr_t *pages_addr, uint64_t pe,
- uint64_t addr, unsigned count,
- uint32_t incr, uint32_t flags)
+static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
{
- uint64_t value;
- unsigned ndw;
-
- while (count) {
- ndw = count * 2;
- if (ndw > 0xFFFFE)
- ndw = 0xFFFFE;
-
- /* for non-physically contiguous pages (system) */
- ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
- SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
- ib->ptr[ib->length_dw++] = pe;
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = ndw;
- for (; ndw > 0; ndw -= 2, --count, pe += 8) {
- value = amdgpu_vm_map_gart(pages_addr, addr);
- addr += incr;
- value |= flags;
- ib->ptr[ib->length_dw++] = value;
- ib->ptr[ib->length_dw++] = upper_32_bits(value);
- }
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = pe;
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw;
+ for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
}
}
*
* Update the page tables using sDMA (CIK).
*/
-static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
- uint64_t pe,
+static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
- uint64_t value;
- unsigned ndw;
-
- while (count) {
- ndw = count;
- if (ndw > 0x7FFFF)
- ndw = 0x7FFFF;
-
- if (flags & AMDGPU_PTE_VALID)
- value = addr;
- else
- value = 0;
-
- /* for physically contiguous pages (vram) */
- ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
- ib->ptr[ib->length_dw++] = pe; /* dst addr */
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = flags; /* mask */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = value; /* value */
- ib->ptr[ib->length_dw++] = upper_32_bits(value);
- ib->ptr[ib->length_dw++] = incr; /* increment size */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
- pe += ndw * 8;
- addr += ndw * incr;
- count -= ndw;
- }
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = flags; /* mask */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count; /* number of entries */
}
/**
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
+static unsigned sdma_v2_4_ring_get_emit_ib_size(struct amdgpu_ring *ring)
+{
+ return
+ 7 + 6; /* sdma_v2_4_ring_emit_ib */
+}
+
+static unsigned sdma_v2_4_ring_get_dma_frame_size(struct amdgpu_ring *ring)
+{
+ return
+ 6 + /* sdma_v2_4_ring_emit_hdp_flush */
+ 3 + /* sdma_v2_4_ring_emit_hdp_invalidate */
+ 6 + /* sdma_v2_4_ring_emit_pipeline_sync */
+ 12 + /* sdma_v2_4_ring_emit_vm_flush */
+ 10 + 10 + 10; /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
+}
+
static int sdma_v2_4_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
.test_ib = sdma_v2_4_ring_test_ib,
.insert_nop = sdma_v2_4_ring_insert_nop,
.pad_ib = sdma_v2_4_ring_pad_ib,
+ .get_emit_ib_size = sdma_v2_4_ring_get_emit_ib_size,
+ .get_dma_frame_size = sdma_v2_4_ring_get_dma_frame_size,
};
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pr_debug("mapping doorbell page:\n");
- pr_debug(" target user address == 0x%08llX\n",
- (unsigned long long) vma->vm_start);
- pr_debug(" physical address == 0x%08llX\n", address);
- pr_debug(" vm_flags == 0x%04lX\n", vma->vm_flags);
- pr_debug(" size == 0x%04lX\n",
- doorbell_process_allocation());
+ pr_debug("kfd: mapping doorbell page in %s\n"
+ " target user address == 0x%08llX\n"
+ " physical address == 0x%08llX\n"
+ " vm_flags == 0x%04lX\n"
+ " size == 0x%04lX\n",
+ __func__,
+ (unsigned long long) vma->vm_start, address, vma->vm_flags,
+ doorbell_process_allocation());
+
return io_remap_pfn_range(vma,
vma->vm_start,
sizeof(u32)) + inx;
pr_debug("kfd: get kernel queue doorbell\n"
- " doorbell offset == 0x%08d\n"
+ " doorbell offset == 0x%08X\n"
" kernel address == 0x%08lX\n",
*doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
u32 *coeff_tab = heo_upscaling_ycoef;
u32 max_memsize;
- if (state->crtc_w < state->src_w)
+ if (state->crtc_h < state->src_h)
coeff_tab = heo_downscaling_ycoef;
for (i = 0; i < ARRAY_SIZE(heo_upscaling_ycoef); i++)
atmel_hlcdc_layer_update_cfg(&plane->layer,
33 + i,
0xffffffff,
coeff_tab[i]);
- factor = ((8 * 256 * state->src_w) - (256 * 4)) /
- state->crtc_w;
+ factor = ((8 * 256 * state->src_h) - (256 * 4)) /
+ state->crtc_h;
factor++;
- max_memsize = ((factor * state->crtc_w) + (256 * 4)) /
+ max_memsize = ((factor * state->crtc_h) + (256 * 4)) /
2048;
- if (max_memsize > state->src_w)
+ if (max_memsize > state->src_h)
factor--;
factor_reg |= (factor << 16) | 0x80000000;
}
if ((state->base.fb->pixel_format == DRM_FORMAT_YUV422 ||
state->base.fb->pixel_format == DRM_FORMAT_NV61) &&
- (state->base.rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270))))
+ (state->base.rotation & (DRM_ROTATE_90 | DRM_ROTATE_270)))
cfg |= ATMEL_HLCDC_YUV422ROT;
atmel_hlcdc_layer_update_cfg(&plane->layer,
/*
* Swap width and size in case of 90 or 270 degrees rotation
*/
- if (state->base.rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270))) {
+ if (state->base.rotation & (DRM_ROTATE_90 | DRM_ROTATE_270)) {
tmp = state->crtc_w;
state->crtc_w = state->crtc_h;
state->crtc_h = tmp;
return -EINVAL;
switch (state->base.rotation & DRM_ROTATE_MASK) {
- case BIT(DRM_ROTATE_90):
+ case DRM_ROTATE_90:
offset = ((y_offset + state->src_y + patched_src_w - 1) /
ydiv) * fb->pitches[i];
offset += ((x_offset + state->src_x) / xdiv) *
fb->pitches[i];
state->pstride[i] = -fb->pitches[i] - state->bpp[i];
break;
- case BIT(DRM_ROTATE_180):
+ case DRM_ROTATE_180:
offset = ((y_offset + state->src_y + patched_src_h - 1) /
ydiv) * fb->pitches[i];
offset += ((x_offset + state->src_x + patched_src_w - 1) /
state->bpp[i]) - fb->pitches[i];
state->pstride[i] = -2 * state->bpp[i];
break;
- case BIT(DRM_ROTATE_270):
+ case DRM_ROTATE_270:
offset = ((y_offset + state->src_y) / ydiv) *
fb->pitches[i];
offset += ((x_offset + state->src_x + patched_src_h - 1) /
(2 * state->bpp[i]);
state->pstride[i] = fb->pitches[i] - state->bpp[i];
break;
- case BIT(DRM_ROTATE_0):
+ case DRM_ROTATE_0:
default:
offset = ((y_offset + state->src_y) / ydiv) *
fb->pitches[i];
}
static int atmel_hlcdc_plane_prepare_fb(struct drm_plane *p,
- const struct drm_plane_state *new_state)
+ struct drm_plane_state *new_state)
{
/*
* FIXME: we should avoid this const -> non-const cast but it's
}
static void atmel_hlcdc_plane_cleanup_fb(struct drm_plane *p,
- const struct drm_plane_state *old_state)
+ struct drm_plane_state *old_state)
{
/*
* FIXME: we should avoid this const -> non-const cast but it's
if (desc->layout.xstride && desc->layout.pstride)
drm_object_attach_property(&plane->base.base,
plane->base.dev->mode_config.rotation_property,
- BIT(DRM_ROTATE_0));
+ DRM_ROTATE_0);
if (desc->layout.csc) {
/*
dev->mode_config.rotation_property =
drm_mode_create_rotation_property(dev,
- BIT(DRM_ROTATE_0) |
- BIT(DRM_ROTATE_90) |
- BIT(DRM_ROTATE_180) |
- BIT(DRM_ROTATE_270));
+ DRM_ROTATE_0 |
+ DRM_ROTATE_90 |
+ DRM_ROTATE_180 |
+ DRM_ROTATE_270);
if (!dev->mode_config.rotation_property)
return ERR_PTR(-ENOMEM);
val,
-1,
&replaced);
- state->color_mgmt_changed = replaced;
+ state->color_mgmt_changed |= replaced;
return ret;
} else if (property == config->ctm_property) {
ret = drm_atomic_replace_property_blob_from_id(crtc,
val,
sizeof(struct drm_color_ctm),
&replaced);
- state->color_mgmt_changed = replaced;
+ state->color_mgmt_changed |= replaced;
return ret;
} else if (property == config->gamma_lut_property) {
ret = drm_atomic_replace_property_blob_from_id(crtc,
val,
-1,
&replaced);
- state->color_mgmt_changed = replaced;
+ state->color_mgmt_changed |= replaced;
return ret;
} else if (crtc->funcs->atomic_set_property)
return crtc->funcs->atomic_set_property(crtc, state, property, val);
/* Check whether this plane supports the fb pixel format. */
ret = drm_plane_check_pixel_format(plane, state->fb->pixel_format);
if (ret) {
- DRM_DEBUG_ATOMIC("Invalid pixel format %s\n",
- drm_get_format_name(state->fb->pixel_format));
+ char *format_name = drm_get_format_name(state->fb->pixel_format);
+ DRM_DEBUG_ATOMIC("Invalid pixel format %s\n", format_name);
+ kfree(format_name);
return ret;
}
goto out;
}
- prop = drm_property_find(dev, prop_id);
+ prop = drm_mode_obj_find_prop_id(obj, prop_id);
if (!prop) {
drm_mode_object_unreference(obj);
ret = -ENOENT;
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/console.h>
#include <linux/kernel.h>
#include <linux/sysrq.h>
#include <linux/slab.h>
-#include <linux/fb.h>
#include <linux/module.h>
#include <drm/drmP.h>
#include <drm/drm_crtc.h>
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
+#include "drm_crtc_helper_internal.h"
+
static bool drm_fbdev_emulation = true;
module_param_named(fbdev_emulation, drm_fbdev_emulation, bool, 0600);
MODULE_PARM_DESC(fbdev_emulation,
goto fail;
}
- plane_state->rotation = BIT(DRM_ROTATE_0);
+ plane_state->rotation = DRM_ROTATE_0;
plane->old_fb = plane->fb;
plane_mask |= 1 << drm_plane_index(plane);
if (dev->mode_config.rotation_property) {
drm_mode_plane_set_obj_prop(plane,
dev->mode_config.rotation_property,
- BIT(DRM_ROTATE_0));
+ DRM_ROTATE_0);
}
}
/* Sometimes user space wants everything disabled, so don't steal the
* display if there's a master. */
- if (lockless_dereference(dev->master))
+ if (READ_ONCE(dev->master))
return false;
drm_for_each_crtc(crtc, dev) {
kfree(helper->crtc_info);
}
+static void drm_fb_helper_resume_worker(struct work_struct *work)
+{
+ struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper,
+ resume_work);
+
+ console_lock();
+ fb_set_suspend(helper->fbdev, 0);
+ console_unlock();
+}
+
static void drm_fb_helper_dirty_work(struct work_struct *work)
{
struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper,
{
INIT_LIST_HEAD(&helper->kernel_fb_list);
spin_lock_init(&helper->dirty_lock);
+ INIT_WORK(&helper->resume_work, drm_fb_helper_resume_worker);
INIT_WORK(&helper->dirty_work, drm_fb_helper_dirty_work);
helper->dirty_clip.x1 = helper->dirty_clip.y1 = ~0;
helper->funcs = funcs;
/**
* drm_fb_helper_set_suspend - wrapper around fb_set_suspend
* @fb_helper: driver-allocated fbdev helper
- * @state: desired state, zero to resume, non-zero to suspend
+ * @suspend: whether to suspend or resume
*
- * A wrapper around fb_set_suspend implemented by fbdev core
+ * A wrapper around fb_set_suspend implemented by fbdev core.
+ * Use drm_fb_helper_set_suspend_unlocked() if you don't need to take
+ * the lock yourself
*/
-void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, int state)
+void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, bool suspend)
{
if (fb_helper && fb_helper->fbdev)
- fb_set_suspend(fb_helper->fbdev, state);
+ fb_set_suspend(fb_helper->fbdev, suspend);
}
EXPORT_SYMBOL(drm_fb_helper_set_suspend);
+/**
+ * drm_fb_helper_set_suspend_unlocked - wrapper around fb_set_suspend that also
+ * takes the console lock
+ * @fb_helper: driver-allocated fbdev helper
+ * @suspend: whether to suspend or resume
+ *
+ * A wrapper around fb_set_suspend() that takes the console lock. If the lock
+ * isn't available on resume, a worker is tasked with waiting for the lock
+ * to become available. The console lock can be pretty contented on resume
+ * due to all the printk activity.
+ *
+ * This function can be called multiple times with the same state since
+ * &fb_info->state is checked to see if fbdev is running or not before locking.
+ *
+ * Use drm_fb_helper_set_suspend() if you need to take the lock yourself.
+ */
+void drm_fb_helper_set_suspend_unlocked(struct drm_fb_helper *fb_helper,
+ bool suspend)
+{
+ if (!fb_helper || !fb_helper->fbdev)
+ return;
+
+ /* make sure there's no pending/ongoing resume */
+ flush_work(&fb_helper->resume_work);
+
+ if (suspend) {
+ if (fb_helper->fbdev->state != FBINFO_STATE_RUNNING)
+ return;
+
+ console_lock();
+
+ } else {
+ if (fb_helper->fbdev->state == FBINFO_STATE_RUNNING)
+ return;
+
+ if (!console_trylock()) {
+ schedule_work(&fb_helper->resume_work);
+ return;
+ }
+ }
+
+ fb_set_suspend(fb_helper->fbdev, suspend);
+ console_unlock();
+}
+EXPORT_SYMBOL(drm_fb_helper_set_suspend_unlocked);
+
static int setcolreg(struct drm_crtc *crtc, u16 red, u16 green,
u16 blue, u16 regno, struct fb_info *info)
{
* @fb_helper: the drm_fb_helper
*
* Scan the connectors attached to the fb_helper and try to put together a
- * setup after *notification of a change in output configuration.
+ * setup after notification of a change in output configuration.
*
* Called at runtime, takes the mode config locks to be able to check/change the
* modeset configuration. Must be run from process context (which usually means
#include <drm/drm_vma_manager.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
+#include "i915_gem_dmabuf.h"
#include "i915_vgpu.h"
#include "i915_trace.h"
#include "intel_drv.h"
+#include "intel_frontbuffer.h"
#include "intel_mocs.h"
+#include <linux/reservation.h>
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/swap.h>
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
-static void
-i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
-static void
-i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
static bool cpu_cache_is_coherent(struct drm_device *dev,
enum i915_cache_level level)
if (ret)
return ret;
- WARN_ON(i915_verify_lists(dev));
return 0;
}
pinned = 0;
mutex_lock(&dev->struct_mutex);
list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
- if (vma->pin_count)
+ if (i915_vma_is_pinned(vma))
pinned += vma->node.size;
list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
- if (vma->pin_count)
+ if (i915_vma_is_pinned(vma))
pinned += vma->node.size;
mutex_unlock(&dev->struct_mutex);
.release = i915_gem_object_release_phys,
};
-static int
-drop_pages(struct drm_i915_gem_object *obj)
+int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
{
- struct i915_vma *vma, *next;
+ struct i915_vma *vma;
+ LIST_HEAD(still_in_list);
int ret;
- drm_gem_object_reference(&obj->base);
- list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link)
- if (i915_vma_unbind(vma))
- break;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
- ret = i915_gem_object_put_pages(obj);
- drm_gem_object_unreference(&obj->base);
+ /* Closed vma are removed from the obj->vma_list - but they may
+ * still have an active binding on the object. To remove those we
+ * must wait for all rendering to complete to the object (as unbinding
+ * must anyway), and retire the requests.
+ */
+ ret = i915_gem_object_wait_rendering(obj, false);
+ if (ret)
+ return ret;
+
+ i915_gem_retire_requests(to_i915(obj->base.dev));
+
+ while ((vma = list_first_entry_or_null(&obj->vma_list,
+ struct i915_vma,
+ obj_link))) {
+ list_move_tail(&vma->obj_link, &still_in_list);
+ ret = i915_vma_unbind(vma);
+ if (ret)
+ break;
+ }
+ list_splice(&still_in_list, &obj->vma_list);
return ret;
}
+/**
+ * Ensures that all rendering to the object has completed and the object is
+ * safe to unbind from the GTT or access from the CPU.
+ * @obj: i915 gem object
+ * @readonly: waiting for just read access or read-write access
+ */
+int
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+ bool readonly)
+{
+ struct reservation_object *resv;
+ struct i915_gem_active *active;
+ unsigned long active_mask;
+ int idx;
+
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+ if (!readonly) {
+ active = obj->last_read;
+ active_mask = i915_gem_object_get_active(obj);
+ } else {
+ active_mask = 1;
+ active = &obj->last_write;
+ }
+
+ for_each_active(active_mask, idx) {
+ int ret;
+
+ ret = i915_gem_active_wait(&active[idx],
+ &obj->base.dev->struct_mutex);
+ if (ret)
+ return ret;
+ }
+
+ resv = i915_gem_object_get_dmabuf_resv(obj);
+ if (resv) {
+ long err;
+
+ err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
+ MAX_SCHEDULE_TIMEOUT);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
+/* A nonblocking variant of the above wait. Must be called prior to
+ * acquiring the mutex for the object, as the object state may change
+ * during this call. A reference must be held by the caller for the object.
+ */
+static __must_check int
+__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
+ struct intel_rps_client *rps,
+ bool readonly)
+{
+ struct i915_gem_active *active;
+ unsigned long active_mask;
+ int idx;
+
+ active_mask = __I915_BO_ACTIVE(obj);
+ if (!active_mask)
+ return 0;
+
+ if (!readonly) {
+ active = obj->last_read;
+ } else {
+ active_mask = 1;
+ active = &obj->last_write;
+ }
+
+ for_each_active(active_mask, idx) {
+ int ret;
+
+ ret = i915_gem_active_wait_unlocked(&active[idx],
+ I915_WAIT_INTERRUPTIBLE,
+ NULL, rps);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static struct intel_rps_client *to_rps_client(struct drm_file *file)
+{
+ struct drm_i915_file_private *fpriv = file->driver_priv;
+
+ return &fpriv->rps;
+}
+
int
i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
int align)
if (obj->base.filp == NULL)
return -EINVAL;
- ret = drop_pages(obj);
+ ret = i915_gem_object_unbind(obj);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_object_put_pages(obj);
if (ret)
return ret;
ret = drm_gem_handle_create(file, &obj->base, &handle);
/* drop reference from allocate - handle holds it now */
- drm_gem_object_unreference_unlocked(&obj->base);
+ i915_gem_object_put_unlocked(obj);
if (ret)
return ret;
* flush the object from the CPU cache.
*/
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
- int *needs_clflush)
+ unsigned int *needs_clflush)
{
int ret;
*needs_clflush = 0;
- if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
- return -EINVAL;
+ if (!i915_gem_object_has_struct_page(obj))
+ return -ENODEV;
+
+ ret = i915_gem_object_wait_rendering(obj, true);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_object_get_pages(obj);
+ if (ret)
+ return ret;
+
+ i915_gem_object_pin_pages(obj);
+
+ i915_gem_object_flush_gtt_write_domain(obj);
- if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
- /* If we're not in the cpu read domain, set ourself into the gtt
- * read domain and manually flush cachelines (if required). This
- * optimizes for the case when the gpu will dirty the data
- * anyway again before the next pread happens. */
+ /* If we're not in the cpu read domain, set ourself into the gtt
+ * read domain and manually flush cachelines (if required). This
+ * optimizes for the case when the gpu will dirty the data
+ * anyway again before the next pread happens.
+ */
+ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
obj->cache_level);
- ret = i915_gem_object_wait_rendering(obj, true);
+
+ if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
- return ret;
+ goto err_unpin;
+
+ *needs_clflush = 0;
}
+ /* return with the pages pinned */
+ return 0;
+
+err_unpin:
+ i915_gem_object_unpin_pages(obj);
+ return ret;
+}
+
+int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
+ unsigned int *needs_clflush)
+{
+ int ret;
+
+ *needs_clflush = 0;
+ if (!i915_gem_object_has_struct_page(obj))
+ return -ENODEV;
+
+ ret = i915_gem_object_wait_rendering(obj, false);
+ if (ret)
+ return ret;
+
ret = i915_gem_object_get_pages(obj);
if (ret)
return ret;
i915_gem_object_pin_pages(obj);
+ i915_gem_object_flush_gtt_write_domain(obj);
+
+ /* If we're not in the cpu write domain, set ourself into the
+ * gtt write domain and manually flush cachelines (as required).
+ * This optimizes for the case when the gpu will use the data
+ * right away and we therefore have to clflush anyway.
+ */
+ if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
+ *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
+
+ /* Same trick applies to invalidate partially written cachelines read
+ * before writing.
+ */
+ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
+ *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
+ obj->cache_level);
+
+ if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, true);
+ if (ret)
+ goto err_unpin;
+
+ *needs_clflush = 0;
+ }
+
+ if ((*needs_clflush & CLFLUSH_AFTER) == 0)
+ obj->cache_dirty = true;
+
+ intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+ obj->dirty = 1;
+ /* return with the pages pinned */
+ return 0;
+
+err_unpin:
+ i915_gem_object_unpin_pages(obj);
return ret;
}
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ struct i915_vma *vma;
struct drm_mm_node node;
char __user *user_data;
uint64_t remain;
uint64_t offset;
int ret;
- ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
- if (ret) {
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+ if (!IS_ERR(vma)) {
+ node.start = i915_ggtt_offset(vma);
+ node.allocated = false;
+ ret = i915_vma_put_fence(vma);
+ if (ret) {
+ i915_vma_unpin(vma);
+ vma = ERR_PTR(ret);
+ }
+ }
+ if (IS_ERR(vma)) {
ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
if (ret)
goto out;
}
i915_gem_object_pin_pages(obj);
- } else {
- node.start = i915_gem_obj_ggtt_offset(obj);
- node.allocated = false;
- ret = i915_gem_object_put_fence(obj);
- if (ret)
- goto out_unpin;
}
ret = i915_gem_object_set_to_gtt_domain(obj, false);
* and write to user memory which may result into page
* faults, and so we cannot perform this under struct_mutex.
*/
- if (slow_user_access(ggtt->mappable, page_base,
+ if (slow_user_access(&ggtt->mappable, page_base,
page_offset, user_data,
page_length, false)) {
ret = -EFAULT;
i915_gem_object_unpin_pages(obj);
remove_mappable_node(&node);
} else {
- i915_gem_object_ggtt_unpin(obj);
+ i915_vma_unpin(vma);
}
out:
return ret;
int needs_clflush = 0;
struct sg_page_iter sg_iter;
- if (!i915_gem_object_has_struct_page(obj))
- return -ENODEV;
-
- user_data = u64_to_user_ptr(args->data_ptr);
- remain = args->size;
-
- obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-
ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
if (ret)
return ret;
+ obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+ user_data = u64_to_user_ptr(args->data_ptr);
offset = args->offset;
+ remain = args->size;
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
offset >> PAGE_SHIFT) {
}
out:
- i915_gem_object_unpin_pages(obj);
+ i915_gem_obj_finish_shmem_access(obj);
return ret;
}
args->size))
return -EFAULT;
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ret;
-
- obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- if (&obj->base == NULL) {
- ret = -ENOENT;
- goto unlock;
- }
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
/* Bounds check source. */
if (args->offset > obj->base.size ||
args->size > obj->base.size - args->offset) {
ret = -EINVAL;
- goto out;
+ goto err;
}
trace_i915_gem_object_pread(obj, args->offset, args->size);
+ ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
+ if (ret)
+ goto err;
+
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret)
+ goto err;
+
ret = i915_gem_shmem_pread(dev, obj, args, file);
/* pread for non shmem backed objects */
intel_runtime_pm_put(to_i915(dev));
}
-out:
- drm_gem_object_unreference(&obj->base);
-unlock:
+ i915_gem_object_put(obj);
mutex_unlock(&dev->struct_mutex);
+
+ return ret;
+
+err:
+ i915_gem_object_put_unlocked(obj);
return ret;
}
/**
* This is the fast pwrite path, where we copy the data directly from the
* user into the GTT, uncached.
- * @dev: drm device pointer
+ * @i915: i915 device private data
* @obj: i915 gem object
* @args: pwrite arguments structure
* @file: drm file pointer
{
struct i915_ggtt *ggtt = &i915->ggtt;
struct drm_device *dev = obj->base.dev;
+ struct i915_vma *vma;
struct drm_mm_node node;
uint64_t remain, offset;
char __user *user_data;
int ret;
bool hit_slow_path = false;
- if (obj->tiling_mode != I915_TILING_NONE)
+ if (i915_gem_object_is_tiled(obj))
return -EFAULT;
- ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
- if (ret) {
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+ PIN_MAPPABLE | PIN_NONBLOCK);
+ if (!IS_ERR(vma)) {
+ node.start = i915_ggtt_offset(vma);
+ node.allocated = false;
+ ret = i915_vma_put_fence(vma);
+ if (ret) {
+ i915_vma_unpin(vma);
+ vma = ERR_PTR(ret);
+ }
+ }
+ if (IS_ERR(vma)) {
ret = insert_mappable_node(i915, &node, PAGE_SIZE);
if (ret)
goto out;
}
i915_gem_object_pin_pages(obj);
- } else {
- node.start = i915_gem_obj_ggtt_offset(obj);
- node.allocated = false;
- ret = i915_gem_object_put_fence(obj);
- if (ret)
- goto out_unpin;
}
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
goto out_unpin;
- intel_fb_obj_invalidate(obj, ORIGIN_GTT);
+ intel_fb_obj_invalidate(obj, ORIGIN_CPU);
obj->dirty = true;
user_data = u64_to_user_ptr(args->data_ptr);
* If the object is non-shmem backed, we retry again with the
* path that handles page fault.
*/
- if (fast_user_write(ggtt->mappable, page_base,
+ if (fast_user_write(&ggtt->mappable, page_base,
page_offset, user_data, page_length)) {
hit_slow_path = true;
mutex_unlock(&dev->struct_mutex);
- if (slow_user_access(ggtt->mappable,
+ if (slow_user_access(&ggtt->mappable,
page_base,
page_offset, user_data,
page_length, true)) {
}
}
- intel_fb_obj_flush(obj, false, ORIGIN_GTT);
+ intel_fb_obj_flush(obj, false, ORIGIN_CPU);
out_unpin:
if (node.allocated) {
wmb();
i915_gem_object_unpin_pages(obj);
remove_mappable_node(&node);
} else {
- i915_gem_object_ggtt_unpin(obj);
+ i915_vma_unpin(vma);
}
out:
return ret;
int shmem_page_offset, page_length, ret = 0;
int obj_do_bit17_swizzling, page_do_bit17_swizzling;
int hit_slowpath = 0;
- int needs_clflush_after = 0;
- int needs_clflush_before = 0;
+ unsigned int needs_clflush;
struct sg_page_iter sg_iter;
- user_data = u64_to_user_ptr(args->data_ptr);
- remain = args->size;
-
- obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-
- if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
- /* If we're not in the cpu write domain, set ourself into the gtt
- * write domain and manually flush cachelines (if required). This
- * optimizes for the case when the gpu will use the data
- * right away and we therefore have to clflush anyway. */
- needs_clflush_after = cpu_write_needs_clflush(obj);
- ret = i915_gem_object_wait_rendering(obj, false);
- if (ret)
- return ret;
- }
- /* Same trick applies to invalidate partially written cachelines read
- * before writing. */
- if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
- needs_clflush_before =
- !cpu_cache_is_coherent(dev, obj->cache_level);
-
- ret = i915_gem_object_get_pages(obj);
+ ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
if (ret)
return ret;
- intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-
- i915_gem_object_pin_pages(obj);
-
+ obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+ user_data = u64_to_user_ptr(args->data_ptr);
offset = args->offset;
- obj->dirty = 1;
+ remain = args->size;
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
offset >> PAGE_SHIFT) {
/* If we don't overwrite a cacheline completely we need to be
* careful to have up-to-date data by first clflushing. Don't
* overcomplicate things and flush the entire patch. */
- partial_cacheline_write = needs_clflush_before &&
+ partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
((shmem_page_offset | page_length)
& (boot_cpu_data.x86_clflush_size - 1));
ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
user_data, page_do_bit17_swizzling,
partial_cacheline_write,
- needs_clflush_after);
+ needs_clflush & CLFLUSH_AFTER);
if (ret == 0)
goto next_page;
ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
user_data, page_do_bit17_swizzling,
partial_cacheline_write,
- needs_clflush_after);
+ needs_clflush & CLFLUSH_AFTER);
mutex_lock(&dev->struct_mutex);
}
out:
- i915_gem_object_unpin_pages(obj);
+ i915_gem_obj_finish_shmem_access(obj);
if (hit_slowpath) {
/*
* cachelines in-line while writing and the object moved
* out of the cpu write domain while we've dropped the lock.
*/
- if (!needs_clflush_after &&
+ if (!(needs_clflush & CLFLUSH_AFTER) &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
if (i915_gem_clflush_object(obj, obj->pin_display))
- needs_clflush_after = true;
+ needs_clflush |= CLFLUSH_AFTER;
}
}
- if (needs_clflush_after)
+ if (needs_clflush & CLFLUSH_AFTER)
i915_gem_chipset_flush(to_i915(dev));
- else
- obj->cache_dirty = true;
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
return ret;
return -EFAULT;
}
- intel_runtime_pm_get(dev_priv);
-
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- goto put_rpm;
-
- obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- if (&obj->base == NULL) {
- ret = -ENOENT;
- goto unlock;
- }
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
/* Bounds check destination. */
if (args->offset > obj->base.size ||
args->size > obj->base.size - args->offset) {
ret = -EINVAL;
- goto out;
+ goto err;
}
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
+ ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
+ if (ret)
+ goto err;
+
+ intel_runtime_pm_get(dev_priv);
+
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret)
+ goto err_rpm;
+
ret = -EFAULT;
/* We can only do the GTT pwrite on untiled buffers, as otherwise
* it would end up going through the fenced access, and we'll get
if (ret == -EFAULT || ret == -ENOSPC) {
if (obj->phys_handle)
ret = i915_gem_phys_pwrite(obj, args, file);
- else if (i915_gem_object_has_struct_page(obj))
- ret = i915_gem_shmem_pwrite(dev, obj, args, file);
else
- ret = -ENODEV;
+ ret = i915_gem_shmem_pwrite(dev, obj, args, file);
}
-out:
- drm_gem_object_unreference(&obj->base);
-unlock:
+ i915_gem_object_put(obj);
mutex_unlock(&dev->struct_mutex);
-put_rpm:
intel_runtime_pm_put(dev_priv);
return ret;
-}
-
-static int
-i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
-{
- if (__i915_terminally_wedged(reset_counter))
- return -EIO;
-
- if (__i915_reset_in_progress(reset_counter)) {
- /* Non-interruptible callers can't handle -EAGAIN, hence return
- * -EIO unconditionally for these. */
- if (!interruptible)
- return -EIO;
-
- return -EAGAIN;
- }
- return 0;
+err_rpm:
+ intel_runtime_pm_put(dev_priv);
+err:
+ i915_gem_object_put_unlocked(obj);
+ return ret;
}
-static unsigned long local_clock_us(unsigned *cpu)
+static inline enum fb_op_origin
+write_origin(struct drm_i915_gem_object *obj, unsigned domain)
{
- unsigned long t;
-
- /* Cheaply and approximately convert from nanoseconds to microseconds.
- * The result and subsequent calculations are also defined in the same
- * approximate microseconds units. The principal source of timing
- * error here is from the simple truncation.
- *
- * Note that local_clock() is only defined wrt to the current CPU;
- * the comparisons are no longer valid if we switch CPUs. Instead of
- * blocking preemption for the entire busywait, we can detect the CPU
- * switch and use that as indicator of system load and a reason to
- * stop busywaiting, see busywait_stop().
- */
- *cpu = get_cpu();
- t = local_clock() >> 10;
- put_cpu();
-
- return t;
-}
-
-static bool busywait_stop(unsigned long timeout, unsigned cpu)
-{
- unsigned this_cpu;
-
- if (time_after(local_clock_us(&this_cpu), timeout))
- return true;
-
- return this_cpu != cpu;
-}
-
-bool __i915_spin_request(const struct drm_i915_gem_request *req,
- int state, unsigned long timeout_us)
-{
- unsigned cpu;
-
- /* When waiting for high frequency requests, e.g. during synchronous
- * rendering split between the CPU and GPU, the finite amount of time
- * required to set up the irq and wait upon it limits the response
- * rate. By busywaiting on the request completion for a short while we
- * can service the high frequency waits as quick as possible. However,
- * if it is a slow request, we want to sleep as quickly as possible.
- * The tradeoff between waiting and sleeping is roughly the time it
- * takes to sleep on a request, on the order of a microsecond.
- */
-
- timeout_us += local_clock_us(&cpu);
- do {
- if (i915_gem_request_completed(req))
- return true;
-
- if (signal_pending_state(state, current))
- break;
-
- if (busywait_stop(timeout_us, cpu))
- break;
-
- cpu_relax_lowlatency();
- } while (!need_resched());
-
- return false;
-}
-
-/**
- * __i915_wait_request - wait until execution of request has finished
- * @req: duh!
- * @interruptible: do an interruptible wait (normally yes)
- * @timeout: in - how long to wait (NULL forever); out - how much time remaining
- * @rps: RPS client
- *
- * Note: It is of utmost importance that the passed in seqno and reset_counter
- * values have been read by the caller in an smp safe manner. Where read-side
- * locks are involved, it is sufficient to read the reset_counter before
- * unlocking the lock that protects the seqno. For lockless tricks, the
- * reset_counter _must_ be read before, and an appropriate smp_rmb must be
- * inserted.
- *
- * Returns 0 if the request was found within the alloted time. Else returns the
- * errno with remaining time filled in timeout argument.
- */
-int __i915_wait_request(struct drm_i915_gem_request *req,
- bool interruptible,
- s64 *timeout,
- struct intel_rps_client *rps)
-{
- int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
- DEFINE_WAIT(reset);
- struct intel_wait wait;
- unsigned long timeout_remain;
- s64 before = 0; /* Only to silence a compiler warning. */
- int ret = 0;
-
- might_sleep();
-
- if (list_empty(&req->list))
- return 0;
-
- if (i915_gem_request_completed(req))
- return 0;
-
- timeout_remain = MAX_SCHEDULE_TIMEOUT;
- if (timeout) {
- if (WARN_ON(*timeout < 0))
- return -EINVAL;
-
- if (*timeout == 0)
- return -ETIME;
-
- timeout_remain = nsecs_to_jiffies_timeout(*timeout);
-
- /*
- * Record current time in case interrupted by signal, or wedged.
- */
- before = ktime_get_raw_ns();
- }
-
- trace_i915_gem_request_wait_begin(req);
-
- /* This client is about to stall waiting for the GPU. In many cases
- * this is undesirable and limits the throughput of the system, as
- * many clients cannot continue processing user input/output whilst
- * blocked. RPS autotuning may take tens of milliseconds to respond
- * to the GPU load and thus incurs additional latency for the client.
- * We can circumvent that by promoting the GPU frequency to maximum
- * before we wait. This makes the GPU throttle up much more quickly
- * (good for benchmarks and user experience, e.g. window animations),
- * but at a cost of spending more power processing the workload
- * (bad for battery). Not all clients even want their results
- * immediately and for them we should just let the GPU select its own
- * frequency to maximise efficiency. To prevent a single client from
- * forcing the clocks too high for the whole system, we only allow
- * each client to waitboost once in a busy period.
- */
- if (INTEL_INFO(req->i915)->gen >= 6)
- gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
-
- /* Optimistic spin for the next ~jiffie before touching IRQs */
- if (i915_spin_request(req, state, 5))
- goto complete;
-
- set_current_state(state);
- add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
-
- intel_wait_init(&wait, req->seqno);
- if (intel_engine_add_wait(req->engine, &wait))
- /* In order to check that we haven't missed the interrupt
- * as we enabled it, we need to kick ourselves to do a
- * coherent check on the seqno before we sleep.
- */
- goto wakeup;
-
- for (;;) {
- if (signal_pending_state(state, current)) {
- ret = -ERESTARTSYS;
- break;
- }
-
- timeout_remain = io_schedule_timeout(timeout_remain);
- if (timeout_remain == 0) {
- ret = -ETIME;
- break;
- }
-
- if (intel_wait_complete(&wait))
- break;
-
- set_current_state(state);
-
-wakeup:
- /* Carefully check if the request is complete, giving time
- * for the seqno to be visible following the interrupt.
- * We also have to check in case we are kicked by the GPU
- * reset in order to drop the struct_mutex.
- */
- if (__i915_request_irq_complete(req))
- break;
-
- /* Only spin if we know the GPU is processing this request */
- if (i915_spin_request(req, state, 2))
- break;
- }
- remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
-
- intel_engine_remove_wait(req->engine, &wait);
- __set_current_state(TASK_RUNNING);
-complete:
- trace_i915_gem_request_wait_end(req);
-
- if (timeout) {
- s64 tres = *timeout - (ktime_get_raw_ns() - before);
-
- *timeout = tres < 0 ? 0 : tres;
-
- /*
- * Apparently ktime isn't accurate enough and occasionally has a
- * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
- * things up to make the test happy. We allow up to 1 jiffy.
- *
- * This is a regrssion from the timespec->ktime conversion.
- */
- if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
- *timeout = 0;
- }
-
- if (rps && req->seqno == req->engine->last_submitted_seqno) {
- /* The GPU is now idle and this client has stalled.
- * Since no other client has submitted a request in the
- * meantime, assume that this client is the only one
- * supplying work to the GPU but is unable to keep that
- * work supplied because it is waiting. Since the GPU is
- * then never kept fully busy, RPS autoclocking will
- * keep the clocks relatively low, causing further delays.
- * Compensate by giving the synchronous client credit for
- * a waitboost next time.
- */
- spin_lock(&req->i915->rps.client_lock);
- list_del_init(&rps->link);
- spin_unlock(&req->i915->rps.client_lock);
- }
-
- return ret;
-}
-
-int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
- struct drm_file *file)
-{
- struct drm_i915_file_private *file_priv;
-
- WARN_ON(!req || !file || req->file_priv);
-
- if (!req || !file)
- return -EINVAL;
-
- if (req->file_priv)
- return -EINVAL;
-
- file_priv = file->driver_priv;
-
- spin_lock(&file_priv->mm.lock);
- req->file_priv = file_priv;
- list_add_tail(&req->client_list, &file_priv->mm.request_list);
- spin_unlock(&file_priv->mm.lock);
-
- req->pid = get_pid(task_pid(current));
-
- return 0;
-}
-
-static inline void
-i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
-{
- struct drm_i915_file_private *file_priv = request->file_priv;
-
- if (!file_priv)
- return;
-
- spin_lock(&file_priv->mm.lock);
- list_del(&request->client_list);
- request->file_priv = NULL;
- spin_unlock(&file_priv->mm.lock);
-
- put_pid(request->pid);
- request->pid = NULL;
-}
-
-static void i915_gem_request_retire(struct drm_i915_gem_request *request)
-{
- trace_i915_gem_request_retire(request);
-
- /* We know the GPU must have read the request to have
- * sent us the seqno + interrupt, so use the position
- * of tail of the request to update the last known position
- * of the GPU head.
- *
- * Note this requires that we are always called in request
- * completion order.
- */
- request->ringbuf->last_retired_head = request->postfix;
-
- list_del_init(&request->list);
- i915_gem_request_remove_from_client(request);
-
- if (request->previous_context) {
- if (i915.enable_execlists)
- intel_lr_context_unpin(request->previous_context,
- request->engine);
- }
-
- i915_gem_context_unreference(request->ctx);
- i915_gem_request_unreference(request);
-}
-
-static void
-__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
-{
- struct intel_engine_cs *engine = req->engine;
- struct drm_i915_gem_request *tmp;
-
- lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
- if (list_empty(&req->list))
- return;
-
- do {
- tmp = list_first_entry(&engine->request_list,
- typeof(*tmp), list);
-
- i915_gem_request_retire(tmp);
- } while (tmp != req);
-
- WARN_ON(i915_verify_lists(engine->dev));
-}
-
-/**
- * Waits for a request to be signaled, and cleans up the
- * request and object lists appropriately for that event.
- * @req: request to wait on
- */
-int
-i915_wait_request(struct drm_i915_gem_request *req)
-{
- struct drm_i915_private *dev_priv = req->i915;
- bool interruptible;
- int ret;
-
- interruptible = dev_priv->mm.interruptible;
-
- BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex));
-
- ret = __i915_wait_request(req, interruptible, NULL, NULL);
- if (ret)
- return ret;
-
- /* If the GPU hung, we want to keep the requests to find the guilty. */
- if (!i915_reset_in_progress(&dev_priv->gpu_error))
- __i915_gem_request_retire__upto(req);
-
- return 0;
-}
-
-/**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- * @obj: i915 gem object
- * @readonly: waiting for read access or write
- */
-int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
- bool readonly)
-{
- int ret, i;
-
- if (!obj->active)
- return 0;
-
- if (readonly) {
- if (obj->last_write_req != NULL) {
- ret = i915_wait_request(obj->last_write_req);
- if (ret)
- return ret;
-
- i = obj->last_write_req->engine->id;
- if (obj->last_read_req[i] == obj->last_write_req)
- i915_gem_object_retire__read(obj, i);
- else
- i915_gem_object_retire__write(obj);
- }
- } else {
- for (i = 0; i < I915_NUM_ENGINES; i++) {
- if (obj->last_read_req[i] == NULL)
- continue;
-
- ret = i915_wait_request(obj->last_read_req[i]);
- if (ret)
- return ret;
-
- i915_gem_object_retire__read(obj, i);
- }
- GEM_BUG_ON(obj->active);
- }
-
- return 0;
-}
-
-static void
-i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_request *req)
-{
- int ring = req->engine->id;
-
- if (obj->last_read_req[ring] == req)
- i915_gem_object_retire__read(obj, ring);
- else if (obj->last_write_req == req)
- i915_gem_object_retire__write(obj);
-
- if (!i915_reset_in_progress(&req->i915->gpu_error))
- __i915_gem_request_retire__upto(req);
-}
-
-/* A nonblocking variant of the above wait. This is a highly dangerous routine
- * as the object state may change during this call.
- */
-static __must_check int
-i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
- struct intel_rps_client *rps,
- bool readonly)
-{
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
- int ret, i, n = 0;
-
- BUG_ON(!mutex_is_locked(&dev->struct_mutex));
- BUG_ON(!dev_priv->mm.interruptible);
-
- if (!obj->active)
- return 0;
-
- if (readonly) {
- struct drm_i915_gem_request *req;
-
- req = obj->last_write_req;
- if (req == NULL)
- return 0;
-
- requests[n++] = i915_gem_request_reference(req);
- } else {
- for (i = 0; i < I915_NUM_ENGINES; i++) {
- struct drm_i915_gem_request *req;
-
- req = obj->last_read_req[i];
- if (req == NULL)
- continue;
-
- requests[n++] = i915_gem_request_reference(req);
- }
- }
-
- mutex_unlock(&dev->struct_mutex);
- ret = 0;
- for (i = 0; ret == 0 && i < n; i++)
- ret = __i915_wait_request(requests[i], true, NULL, rps);
- mutex_lock(&dev->struct_mutex);
-
- for (i = 0; i < n; i++) {
- if (ret == 0)
- i915_gem_object_retire_request(obj, requests[i]);
- i915_gem_request_unreference(requests[i]);
- }
-
- return ret;
-}
-
-static struct intel_rps_client *to_rps_client(struct drm_file *file)
-{
- struct drm_i915_file_private *fpriv = file->driver_priv;
- return &fpriv->rps;
-}
-
-static enum fb_op_origin
-write_origin(struct drm_i915_gem_object *obj, unsigned domain)
-{
- return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
- ORIGIN_GTT : ORIGIN_CPU;
+ return (domain == I915_GEM_DOMAIN_GTT ?
+ obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
}
/**
int ret;
/* Only handle setting domains to types used by the CPU. */
- if (write_domain & I915_GEM_GPU_DOMAINS)
- return -EINVAL;
-
- if (read_domains & I915_GEM_GPU_DOMAINS)
+ if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
return -EINVAL;
/* Having something in the write domain implies it's in the read
if (write_domain != 0 && read_domains != write_domain)
return -EINVAL;
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ret;
-
- obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- if (&obj->base == NULL) {
- ret = -ENOENT;
- goto unlock;
- }
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
/* Try to flush the object off the GPU without holding the lock.
* We will repeat the flush holding the lock in the normal manner
* to catch cases where we are gazumped.
*/
- ret = i915_gem_object_wait_rendering__nonblocking(obj,
- to_rps_client(file),
- !write_domain);
+ ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
+ if (ret)
+ goto err;
+
+ ret = i915_mutex_lock_interruptible(dev);
if (ret)
- goto unref;
+ goto err;
if (read_domains & I915_GEM_DOMAIN_GTT)
ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
if (write_domain != 0)
intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
-unref:
- drm_gem_object_unreference(&obj->base);
-unlock:
+ i915_gem_object_put(obj);
mutex_unlock(&dev->struct_mutex);
return ret;
+
+err:
+ i915_gem_object_put_unlocked(obj);
+ return ret;
}
/**
{
struct drm_i915_gem_sw_finish *args = data;
struct drm_i915_gem_object *obj;
- int ret = 0;
+ int err = 0;
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ret;
-
- obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- if (&obj->base == NULL) {
- ret = -ENOENT;
- goto unlock;
- }
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
/* Pinned buffers may be scanout, so flush the cache */
- if (obj->pin_display)
- i915_gem_object_flush_cpu_write_domain(obj);
+ if (READ_ONCE(obj->pin_display)) {
+ err = i915_mutex_lock_interruptible(dev);
+ if (!err) {
+ i915_gem_object_flush_cpu_write_domain(obj);
+ mutex_unlock(&dev->struct_mutex);
+ }
+ }
- drm_gem_object_unreference(&obj->base);
-unlock:
- mutex_unlock(&dev->struct_mutex);
- return ret;
+ i915_gem_object_put_unlocked(obj);
+ return err;
}
/**
struct drm_file *file)
{
struct drm_i915_gem_mmap *args = data;
- struct drm_gem_object *obj;
+ struct drm_i915_gem_object *obj;
unsigned long addr;
if (args->flags & ~(I915_MMAP_WC))
if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
return -ENODEV;
- obj = drm_gem_object_lookup(file, args->handle);
- if (obj == NULL)
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj)
return -ENOENT;
/* prime objects have no backing filp to GEM mmap
* pages from.
*/
- if (!obj->filp) {
- drm_gem_object_unreference_unlocked(obj);
+ if (!obj->base.filp) {
+ i915_gem_object_put_unlocked(obj);
return -EINVAL;
}
- addr = vm_mmap(obj->filp, 0, args->size,
+ addr = vm_mmap(obj->base.filp, 0, args->size,
PROT_READ | PROT_WRITE, MAP_SHARED,
args->offset);
if (args->flags & I915_MMAP_WC) {
struct vm_area_struct *vma;
if (down_write_killable(&mm->mmap_sem)) {
- drm_gem_object_unreference_unlocked(obj);
+ i915_gem_object_put_unlocked(obj);
return -EINTR;
}
vma = find_vma(mm, addr);
up_write(&mm->mmap_sem);
/* This may race, but that's ok, it only gets set */
- WRITE_ONCE(to_intel_bo(obj)->has_wc_mmap, true);
+ WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
}
- drm_gem_object_unreference_unlocked(obj);
+ i915_gem_object_put_unlocked(obj);
if (IS_ERR((void *)addr))
return addr;
return 0;
}
+static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
+{
+ u64 size;
+
+ size = i915_gem_object_get_stride(obj);
+ size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
+
+ return size >> PAGE_SHIFT;
+}
+
+/**
+ * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
+ *
+ * A history of the GTT mmap interface:
+ *
+ * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
+ * aligned and suitable for fencing, and still fit into the available
+ * mappable space left by the pinned display objects. A classic problem
+ * we called the page-fault-of-doom where we would ping-pong between
+ * two objects that could not fit inside the GTT and so the memcpy
+ * would page one object in at the expense of the other between every
+ * single byte.
+ *
+ * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
+ * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
+ * object is too large for the available space (or simply too large
+ * for the mappable aperture!), a view is created instead and faulted
+ * into userspace. (This view is aligned and sized appropriately for
+ * fenced access.)
+ *
+ * Restrictions:
+ *
+ * * snoopable objects cannot be accessed via the GTT. It can cause machine
+ * hangs on some architectures, corruption on others. An attempt to service
+ * a GTT page fault from a snoopable object will generate a SIGBUS.
+ *
+ * * the object must be able to fit into RAM (physical memory, though no
+ * limited to the mappable aperture).
+ *
+ *
+ * Caveats:
+ *
+ * * a new GTT page fault will synchronize rendering from the GPU and flush
+ * all data to system memory. Subsequent access will not be synchronized.
+ *
+ * * all mappings are revoked on runtime device suspend.
+ *
+ * * there are only 8, 16 or 32 fence registers to share between all users
+ * (older machines require fence register for display and blitter access
+ * as well). Contention of the fence registers will cause the previous users
+ * to be unmapped and any new access will generate new page faults.
+ *
+ * * running out of memory while servicing a fault may generate a SIGBUS,
+ * rather than the expected SIGSEGV.
+ */
+int i915_gem_mmap_gtt_version(void)
+{
+ return 1;
+}
+
/**
* i915_gem_fault - fault a page into the GTT
- * @vma: VMA in question
+ * @area: CPU VMA in question
* @vmf: fault info
*
* The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
* from the GTT and/or fence registers to make room. So performance may
* suffer if the GTT working set is large or there are few fence registers
* left.
+ *
+ * The current feature set supported by i915_gem_fault() and thus GTT mmaps
+ * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
*/
-int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
{
- struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
+#define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
+ struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
- struct i915_ggtt_view view = i915_ggtt_view_normal;
- pgoff_t page_offset;
- unsigned long pfn;
- int ret = 0;
bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
-
- intel_runtime_pm_get(dev_priv);
+ struct i915_vma *vma;
+ pgoff_t page_offset;
+ unsigned int flags;
+ int ret;
/* We don't use vmf->pgoff since that has the fake offset */
- page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
+ page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >>
PAGE_SHIFT;
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- goto out;
-
trace_i915_gem_object_fault(obj, page_offset, true, write);
/* Try to flush the object off the GPU first without holding the lock.
- * Upon reacquiring the lock, we will perform our sanity checks and then
+ * Upon acquiring the lock, we will perform our sanity checks and then
* repeat the flush holding the lock in the normal manner to catch cases
* where we are gazumped.
*/
- ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
+ ret = __unsafe_wait_rendering(obj, NULL, !write);
if (ret)
- goto unlock;
+ goto err;
+
+ intel_runtime_pm_get(dev_priv);
+
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret)
+ goto err_rpm;
/* Access to snoopable pages through the GTT is incoherent. */
if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
ret = -EFAULT;
- goto unlock;
+ goto err_unlock;
}
- /* Use a partial view if the object is bigger than the aperture. */
- if (obj->base.size >= ggtt->mappable_end &&
- obj->tiling_mode == I915_TILING_NONE) {
- static const unsigned int chunk_size = 256; // 1 MiB
+ /* If the object is smaller than a couple of partial vma, it is
+ * not worth only creating a single partial vma - we may as well
+ * clear enough space for the full object.
+ */
+ flags = PIN_MAPPABLE;
+ if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
+ flags |= PIN_NONBLOCK | PIN_NONFAULT;
+
+ /* Now pin it into the GTT as needed */
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
+ if (IS_ERR(vma)) {
+ struct i915_ggtt_view view;
+ unsigned int chunk_size;
+
+ /* Use a partial view if it is bigger than available space */
+ chunk_size = MIN_CHUNK_PAGES;
+ if (i915_gem_object_is_tiled(obj))
+ chunk_size = max(chunk_size, tile_row_pages(obj));
memset(&view, 0, sizeof(view));
view.type = I915_GGTT_VIEW_PARTIAL;
view.params.partial.offset = rounddown(page_offset, chunk_size);
view.params.partial.size =
- min_t(unsigned int,
- chunk_size,
- (vma->vm_end - vma->vm_start)/PAGE_SIZE -
+ min_t(unsigned int, chunk_size,
+ (area->vm_end - area->vm_start) / PAGE_SIZE -
view.params.partial.offset);
- }
- /* Now pin it into the GTT if needed */
- ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
- if (ret)
- goto unlock;
+ /* If the partial covers the entire object, just create a
+ * normal VMA.
+ */
+ if (chunk_size >= obj->base.size >> PAGE_SHIFT)
+ view.type = I915_GGTT_VIEW_NORMAL;
+
+ /* Userspace is now writing through an untracked VMA, abandon
+ * all hope that the hardware is able to track future writes.
+ */
+ obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
+
+ vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+ }
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err_unlock;
+ }
ret = i915_gem_object_set_to_gtt_domain(obj, write);
if (ret)
- goto unpin;
+ goto err_unpin;
- ret = i915_gem_object_get_fence(obj);
+ ret = i915_vma_get_fence(vma);
if (ret)
- goto unpin;
+ goto err_unpin;
/* Finally, remap it using the new GTT offset */
- pfn = ggtt->mappable_base +
- i915_gem_obj_ggtt_offset_view(obj, &view);
- pfn >>= PAGE_SHIFT;
-
- if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
- /* Overriding existing pages in partial view does not cause
- * us any trouble as TLBs are still valid because the fault
- * is due to userspace losing part of the mapping or never
- * having accessed it before (at this partials' range).
- */
- unsigned long base = vma->vm_start +
- (view.params.partial.offset << PAGE_SHIFT);
- unsigned int i;
-
- for (i = 0; i < view.params.partial.size; i++) {
- ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
- if (ret)
- break;
- }
-
- obj->fault_mappable = true;
- } else {
- if (!obj->fault_mappable) {
- unsigned long size = min_t(unsigned long,
- vma->vm_end - vma->vm_start,
- obj->base.size);
- int i;
-
- for (i = 0; i < size >> PAGE_SHIFT; i++) {
- ret = vm_insert_pfn(vma,
- (unsigned long)vma->vm_start + i * PAGE_SIZE,
- pfn + i);
- if (ret)
- break;
- }
+ ret = remap_io_mapping(area,
+ area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
+ (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
+ min_t(u64, vma->size, area->vm_end - area->vm_start),
+ &ggtt->mappable);
+ if (ret)
+ goto err_unpin;
- obj->fault_mappable = true;
- } else
- ret = vm_insert_pfn(vma,
- (unsigned long)vmf->virtual_address,
- pfn + page_offset);
- }
-unpin:
- i915_gem_object_ggtt_unpin_view(obj, &view);
-unlock:
+ obj->fault_mappable = true;
+err_unpin:
+ __i915_vma_unpin(vma);
+err_unlock:
mutex_unlock(&dev->struct_mutex);
-out:
+err_rpm:
+ intel_runtime_pm_put(dev_priv);
+err:
switch (ret) {
case -EIO:
/*
ret = VM_FAULT_SIGBUS;
break;
}
-
- intel_runtime_pm_put(dev_priv);
return ret;
}
i915_gem_release_mmap(obj);
}
-uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
+/**
+ * i915_gem_get_ggtt_size - return required global GTT size for an object
+ * @dev_priv: i915 device
+ * @size: object size
+ * @tiling_mode: tiling mode
+ *
+ * Return the required global GTT size for an object, taking into account
+ * potential fence register mapping.
+ */
+u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
+ u64 size, int tiling_mode)
{
- uint32_t gtt_size;
+ u64 ggtt_size;
- if (INTEL_INFO(dev)->gen >= 4 ||
+ GEM_BUG_ON(size == 0);
+
+ if (INTEL_GEN(dev_priv) >= 4 ||
tiling_mode == I915_TILING_NONE)
return size;
/* Previous chips need a power-of-two fence region when tiling */
- if (IS_GEN3(dev))
- gtt_size = 1024*1024;
+ if (IS_GEN3(dev_priv))
+ ggtt_size = 1024*1024;
else
- gtt_size = 512*1024;
+ ggtt_size = 512*1024;
- while (gtt_size < size)
- gtt_size <<= 1;
+ while (ggtt_size < size)
+ ggtt_size <<= 1;
- return gtt_size;
+ return ggtt_size;
}
/**
- * i915_gem_get_gtt_alignment - return required GTT alignment for an object
- * @dev: drm device
+ * i915_gem_get_ggtt_alignment - return required global GTT alignment
+ * @dev_priv: i915 device
* @size: object size
* @tiling_mode: tiling mode
- * @fenced: is fenced alignemned required or not
+ * @fenced: is fenced alignment required or not
*
- * Return the required GTT alignment for an object, taking into account
+ * Return the required global GTT alignment for an object, taking into account
* potential fence register mapping.
*/
-uint32_t
-i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
- int tiling_mode, bool fenced)
+u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
+ int tiling_mode, bool fenced)
{
+ GEM_BUG_ON(size == 0);
+
/*
* Minimum alignment is 4k (GTT page size), but might be greater
* if a fence register is needed for the object.
*/
- if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
+ if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
tiling_mode == I915_TILING_NONE)
return 4096;
* Previous chips need to be aligned to the size of the smallest
* fence register that can contain the object.
*/
- return i915_gem_get_gtt_size(dev, size, tiling_mode);
+ return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
}
static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- int ret;
-
- dev_priv->mm.shrinker_no_lock_stealing = true;
+ int err;
- ret = drm_gem_create_mmap_offset(&obj->base);
- if (ret != -ENOSPC)
- goto out;
+ err = drm_gem_create_mmap_offset(&obj->base);
+ if (!err)
+ return 0;
- /* Badly fragmented mmap space? The only way we can recover
- * space is by destroying unwanted objects. We can't randomly release
- * mmap_offsets as userspace expects them to be persistent for the
- * lifetime of the objects. The closest we can is to release the
- * offsets on purgeable objects by truncating it and marking it purged,
- * which prevents userspace from ever using that object again.
+ /* We can idle the GPU locklessly to flush stale objects, but in order
+ * to claim that space for ourselves, we need to take the big
+ * struct_mutex to free the requests+objects and allocate our slot.
*/
- i915_gem_shrink(dev_priv,
- obj->base.size >> PAGE_SHIFT,
- I915_SHRINK_BOUND |
- I915_SHRINK_UNBOUND |
- I915_SHRINK_PURGEABLE);
- ret = drm_gem_create_mmap_offset(&obj->base);
- if (ret != -ENOSPC)
- goto out;
+ err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+ if (err)
+ return err;
- i915_gem_shrink_all(dev_priv);
- ret = drm_gem_create_mmap_offset(&obj->base);
-out:
- dev_priv->mm.shrinker_no_lock_stealing = false;
+ err = i915_mutex_lock_interruptible(&dev_priv->drm);
+ if (!err) {
+ i915_gem_retire_requests(dev_priv);
+ err = drm_gem_create_mmap_offset(&obj->base);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+ }
- return ret;
+ return err;
}
static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
struct drm_i915_gem_object *obj;
int ret;
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ret;
-
- obj = to_intel_bo(drm_gem_object_lookup(file, handle));
- if (&obj->base == NULL) {
- ret = -ENOENT;
- goto unlock;
- }
-
- if (obj->madv != I915_MADV_WILLNEED) {
- DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
- ret = -EFAULT;
- goto out;
- }
+ obj = i915_gem_object_lookup(file, handle);
+ if (!obj)
+ return -ENOENT;
ret = i915_gem_object_create_mmap_offset(obj);
- if (ret)
- goto out;
-
- *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+ if (ret == 0)
+ *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
-out:
- drm_gem_object_unreference(&obj->base);
-unlock:
- mutex_unlock(&dev->struct_mutex);
+ i915_gem_object_put_unlocked(obj);
return ret;
}
if (obj->pages_pin_count)
return -EBUSY;
- BUG_ON(i915_gem_obj_bound_any(obj));
+ GEM_BUG_ON(obj->bind_count);
/* ->put_pages might need to allocate memory for the bit17 swizzle
* array, hence protect them from being reaped by removing them from gtt
list_del(&obj->global_list);
if (obj->mapping) {
- if (is_vmalloc_addr(obj->mapping))
- vunmap(obj->mapping);
+ void *ptr;
+
+ ptr = ptr_mask_bits(obj->mapping);
+ if (is_vmalloc_addr(ptr))
+ vunmap(ptr);
else
- kunmap(kmap_to_page(obj->mapping));
+ kunmap(kmap_to_page(ptr));
+
obj->mapping = NULL;
}
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_do_bit_17_swizzle(obj);
- if (obj->tiling_mode != I915_TILING_NONE &&
+ if (i915_gem_object_is_tiled(obj) &&
dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
i915_gem_object_pin_pages(obj);
}
/* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+ enum i915_map_type type)
{
unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
struct sg_table *sgt = obj->pages;
struct page *stack_pages[32];
struct page **pages = stack_pages;
unsigned long i = 0;
+ pgprot_t pgprot;
void *addr;
/* A single page can always be kmapped */
- if (n_pages == 1)
+ if (n_pages == 1 && type == I915_MAP_WB)
return kmap(sg_page(sgt->sgl));
if (n_pages > ARRAY_SIZE(stack_pages)) {
/* Check that we have the expected number of pages */
GEM_BUG_ON(i != n_pages);
- addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+ switch (type) {
+ case I915_MAP_WB:
+ pgprot = PAGE_KERNEL;
+ break;
+ case I915_MAP_WC:
+ pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
+ break;
+ }
+ addr = vmap(pages, n_pages, 0, pgprot);
if (pages != stack_pages)
drm_free_large(pages);
}
/* get, pin, and map the pages of the object into kernel space */
-void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+ enum i915_map_type type)
{
+ enum i915_map_type has_type;
+ bool pinned;
+ void *ptr;
int ret;
lockdep_assert_held(&obj->base.dev->struct_mutex);
+ GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
ret = i915_gem_object_get_pages(obj);
if (ret)
return ERR_PTR(ret);
i915_gem_object_pin_pages(obj);
+ pinned = obj->pages_pin_count > 1;
- if (!obj->mapping) {
- obj->mapping = i915_gem_object_map(obj);
- if (!obj->mapping) {
- i915_gem_object_unpin_pages(obj);
- return ERR_PTR(-ENOMEM);
+ ptr = ptr_unpack_bits(obj->mapping, has_type);
+ if (ptr && has_type != type) {
+ if (pinned) {
+ ret = -EBUSY;
+ goto err;
}
- }
- return obj->mapping;
-}
+ if (is_vmalloc_addr(ptr))
+ vunmap(ptr);
+ else
+ kunmap(kmap_to_page(ptr));
-void i915_vma_move_to_active(struct i915_vma *vma,
- struct drm_i915_gem_request *req)
-{
- struct drm_i915_gem_object *obj = vma->obj;
- struct intel_engine_cs *engine;
+ ptr = obj->mapping = NULL;
+ }
- engine = i915_gem_request_get_engine(req);
+ if (!ptr) {
+ ptr = i915_gem_object_map(obj, type);
+ if (!ptr) {
+ ret = -ENOMEM;
+ goto err;
+ }
- /* Add a reference if we're newly entering the active list. */
- if (obj->active == 0)
- drm_gem_object_reference(&obj->base);
- obj->active |= intel_engine_flag(engine);
+ obj->mapping = ptr_pack_bits(ptr, type);
+ }
- list_move_tail(&obj->engine_list[engine->id], &engine->active_list);
- i915_gem_request_assign(&obj->last_read_req[engine->id], req);
+ return ptr;
- list_move_tail(&vma->vm_link, &vma->vm->active_list);
+err:
+ i915_gem_object_unpin_pages(obj);
+ return ERR_PTR(ret);
}
static void
-i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
+i915_gem_object_retire__write(struct i915_gem_active *active,
+ struct drm_i915_gem_request *request)
{
- GEM_BUG_ON(obj->last_write_req == NULL);
- GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine)));
+ struct drm_i915_gem_object *obj =
+ container_of(active, struct drm_i915_gem_object, last_write);
- i915_gem_request_assign(&obj->last_write_req, NULL);
intel_fb_obj_flush(obj, true, ORIGIN_CS);
}
static void
-i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
+i915_gem_object_retire__read(struct i915_gem_active *active,
+ struct drm_i915_gem_request *request)
{
- struct i915_vma *vma;
-
- GEM_BUG_ON(obj->last_read_req[ring] == NULL);
- GEM_BUG_ON(!(obj->active & (1 << ring)));
-
- list_del_init(&obj->engine_list[ring]);
- i915_gem_request_assign(&obj->last_read_req[ring], NULL);
-
- if (obj->last_write_req && obj->last_write_req->engine->id == ring)
- i915_gem_object_retire__write(obj);
-
- obj->active &= ~(1 << ring);
- if (obj->active)
- return;
-
- /* Bump our place on the bound list to keep it roughly in LRU order
- * so that we don't steal from recently used but inactive objects
- * (unless we are forced to ofc!)
- */
- list_move_tail(&obj->global_list,
- &to_i915(obj->base.dev)->mm.bound_list);
-
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
- if (!list_empty(&vma->vm_link))
- list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
- }
-
- i915_gem_request_assign(&obj->last_fenced_req, NULL);
- drm_gem_object_unreference(&obj->base);
-}
-
-static int
-i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
-{
- struct intel_engine_cs *engine;
- int ret;
-
- /* Carefully retire all requests without writing to the rings */
- for_each_engine(engine, dev_priv) {
- ret = intel_engine_idle(engine);
- if (ret)
- return ret;
- }
- i915_gem_retire_requests(dev_priv);
-
- /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
- if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
- while (intel_kick_waiters(dev_priv) ||
- intel_kick_signalers(dev_priv))
- yield();
- }
-
- /* Finally reset hw state */
- for_each_engine(engine, dev_priv)
- intel_ring_init_seqno(engine, seqno);
-
- return 0;
-}
-
-int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
-{
- struct drm_i915_private *dev_priv = to_i915(dev);
- int ret;
-
- if (seqno == 0)
- return -EINVAL;
-
- /* HWS page needs to be set less than what we
- * will inject to ring
- */
- ret = i915_gem_init_seqno(dev_priv, seqno - 1);
- if (ret)
- return ret;
-
- /* Carefully set the last_seqno value so that wrap
- * detection still works
- */
- dev_priv->next_seqno = seqno;
- dev_priv->last_seqno = seqno - 1;
- if (dev_priv->last_seqno == 0)
- dev_priv->last_seqno--;
-
- return 0;
-}
-
-int
-i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
-{
- /* reserve 0 for non-seqno */
- if (dev_priv->next_seqno == 0) {
- int ret = i915_gem_init_seqno(dev_priv, 0);
- if (ret)
- return ret;
-
- dev_priv->next_seqno = 1;
- }
-
- *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
- return 0;
-}
-
-static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- dev_priv->gt.active_engines |= intel_engine_flag(engine);
- if (dev_priv->gt.awake)
- return;
-
- intel_runtime_pm_get_noresume(dev_priv);
- dev_priv->gt.awake = true;
-
- i915_update_gfx_val(dev_priv);
- if (INTEL_GEN(dev_priv) >= 6)
- gen6_rps_busy(dev_priv);
-
- queue_delayed_work(dev_priv->wq,
- &dev_priv->gt.retire_work,
- round_jiffies_up_relative(HZ));
-}
-
-/*
- * NB: This function is not allowed to fail. Doing so would mean the the
- * request is not being tracked for completion but the work itself is
- * going to happen on the hardware. This would be a Bad Thing(tm).
- */
-void __i915_add_request(struct drm_i915_gem_request *request,
- struct drm_i915_gem_object *obj,
- bool flush_caches)
-{
- struct intel_engine_cs *engine;
- struct intel_ringbuffer *ringbuf;
- u32 request_start;
- u32 reserved_tail;
- int ret;
-
- if (WARN_ON(request == NULL))
- return;
-
- engine = request->engine;
- ringbuf = request->ringbuf;
-
- /*
- * To ensure that this call will not fail, space for its emissions
- * should already have been reserved in the ring buffer. Let the ring
- * know that it is time to use that space up.
- */
- request_start = intel_ring_get_tail(ringbuf);
- reserved_tail = request->reserved_space;
- request->reserved_space = 0;
-
- /*
- * Emit any outstanding flushes - execbuf can fail to emit the flush
- * after having emitted the batchbuffer command. Hence we need to fix
- * things up similar to emitting the lazy request. The difference here
- * is that the flush _must_ happen before the next request, no matter
- * what.
- */
- if (flush_caches) {
- if (i915.enable_execlists)
- ret = logical_ring_flush_all_caches(request);
- else
- ret = intel_ring_flush_all_caches(request);
- /* Not allowed to fail! */
- WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
- }
-
- trace_i915_gem_request_add(request);
+ int idx = request->engine->id;
+ struct drm_i915_gem_object *obj =
+ container_of(active, struct drm_i915_gem_object, last_read[idx]);
- request->head = request_start;
+ GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
- /* Whilst this request exists, batch_obj will be on the
- * active_list, and so will hold the active reference. Only when this
- * request is retired will the the batch_obj be moved onto the
- * inactive_list and lose its active reference. Hence we do not need
- * to explicitly hold another reference here.
- */
- request->batch_obj = obj;
+ i915_gem_object_clear_active(obj, idx);
+ if (i915_gem_object_is_active(obj))
+ return;
- /* Seal the request and mark it as pending execution. Note that
- * we may inspect this state, without holding any locks, during
- * hangcheck. Hence we apply the barrier to ensure that we do not
- * see a more recent value in the hws than we are tracking.
- */
- request->emitted_jiffies = jiffies;
- request->previous_seqno = engine->last_submitted_seqno;
- smp_store_mb(engine->last_submitted_seqno, request->seqno);
- list_add_tail(&request->list, &engine->request_list);
-
- /* Record the position of the start of the request so that
- * should we detect the updated seqno part-way through the
- * GPU processing the request, we never over-estimate the
- * position of the head.
+ /* Bump our place on the bound list to keep it roughly in LRU order
+ * so that we don't steal from recently used but inactive objects
+ * (unless we are forced to ofc!)
*/
- request->postfix = intel_ring_get_tail(ringbuf);
-
- if (i915.enable_execlists)
- ret = engine->emit_request(request);
- else {
- ret = engine->add_request(request);
+ if (obj->bind_count)
+ list_move_tail(&obj->global_list,
+ &request->i915->mm.bound_list);
- request->tail = intel_ring_get_tail(ringbuf);
- }
- /* Not allowed to fail! */
- WARN(ret, "emit|add_request failed: %d!\n", ret);
- /* Sanity check that the reserved size was large enough. */
- ret = intel_ring_get_tail(ringbuf) - request_start;
- if (ret < 0)
- ret += ringbuf->size;
- WARN_ONCE(ret > reserved_tail,
- "Not enough space reserved (%d bytes) "
- "for adding the request (%d bytes)\n",
- reserved_tail, ret);
-
- i915_gem_mark_busy(engine);
+ i915_gem_object_put(obj);
}
static bool i915_context_is_banned(const struct i915_gem_context *ctx)
}
}
-void i915_gem_request_free(struct kref *req_ref)
-{
- struct drm_i915_gem_request *req = container_of(req_ref,
- typeof(*req), ref);
- kmem_cache_free(req->i915->requests, req);
-}
-
-static inline int
-__i915_gem_request_alloc(struct intel_engine_cs *engine,
- struct i915_gem_context *ctx,
- struct drm_i915_gem_request **req_out)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
- struct drm_i915_gem_request *req;
- int ret;
-
- if (!req_out)
- return -EINVAL;
-
- *req_out = NULL;
-
- /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
- * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
- * and restart.
- */
- ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
- if (ret)
- return ret;
-
- req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
- if (req == NULL)
- return -ENOMEM;
-
- ret = i915_gem_get_seqno(engine->i915, &req->seqno);
- if (ret)
- goto err;
-
- kref_init(&req->ref);
- req->i915 = dev_priv;
- req->engine = engine;
- req->ctx = ctx;
- i915_gem_context_reference(req->ctx);
-
- /*
- * Reserve space in the ring buffer for all the commands required to
- * eventually emit this request. This is to guarantee that the
- * i915_add_request() call can't fail. Note that the reserve may need
- * to be redone if the request is not actually submitted straight
- * away, e.g. because a GPU scheduler has deferred it.
- */
- req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
-
- if (i915.enable_execlists)
- ret = intel_logical_ring_alloc_request_extras(req);
- else
- ret = intel_ring_alloc_request_extras(req);
- if (ret)
- goto err_ctx;
-
- *req_out = req;
- return 0;
-
-err_ctx:
- i915_gem_context_unreference(ctx);
-err:
- kmem_cache_free(dev_priv->requests, req);
- return ret;
-}
-
-/**
- * i915_gem_request_alloc - allocate a request structure
- *
- * @engine: engine that we wish to issue the request on.
- * @ctx: context that the request will be associated with.
- * This can be NULL if the request is not directly related to
- * any specific user context, in which case this function will
- * choose an appropriate context to use.
- *
- * Returns a pointer to the allocated request if successful,
- * or an error code if not.
- */
-struct drm_i915_gem_request *
-i915_gem_request_alloc(struct intel_engine_cs *engine,
- struct i915_gem_context *ctx)
-{
- struct drm_i915_gem_request *req;
- int err;
-
- if (ctx == NULL)
- ctx = engine->i915->kernel_context;
- err = __i915_gem_request_alloc(engine, ctx, &req);
- return err ? ERR_PTR(err) : req;
-}
-
struct drm_i915_gem_request *
i915_gem_find_active_request(struct intel_engine_cs *engine)
{
* extra delay for a recent interrupt is pointless. Hence, we do
* not need an engine->irq_seqno_barrier() before the seqno reads.
*/
- list_for_each_entry(request, &engine->request_list, list) {
+ list_for_each_entry(request, &engine->request_list, link) {
if (i915_gem_request_completed(request))
continue;
+ if (!i915_sw_fence_done(&request->submit))
+ break;
+
return request;
}
return NULL;
}
-static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
+static void reset_request(struct drm_i915_gem_request *request)
+{
+ void *vaddr = request->ring->vaddr;
+ u32 head;
+
+ /* As this request likely depends on state from the lost
+ * context, clear out all the user operations leaving the
+ * breadcrumb at the end (so we get the fence notifications).
+ */
+ head = request->head;
+ if (request->postfix < head) {
+ memset(vaddr + head, 0, request->ring->size - head);
+ head = 0;
+ }
+ memset(vaddr + head, 0, request->postfix - head);
+}
+
+static void i915_gem_reset_engine(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request;
+ struct i915_gem_context *incomplete_ctx;
bool ring_hung;
+ /* Ensure irq handler finishes, and not run again. */
+ tasklet_kill(&engine->irq_tasklet);
+ if (engine->irq_seqno_barrier)
+ engine->irq_seqno_barrier(engine);
+
request = i915_gem_find_active_request(engine);
- if (request == NULL)
+ if (!request)
return;
ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
-
i915_set_reset_status(request->ctx, ring_hung);
- list_for_each_entry_continue(request, &engine->request_list, list)
- i915_set_reset_status(request->ctx, false);
-}
-
-static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
-{
- struct intel_ringbuffer *buffer;
-
- while (!list_empty(&engine->active_list)) {
- struct drm_i915_gem_object *obj;
-
- obj = list_first_entry(&engine->active_list,
- struct drm_i915_gem_object,
- engine_list[engine->id]);
-
- i915_gem_object_retire__read(obj, engine->id);
- }
-
- /*
- * Clear the execlists queue up before freeing the requests, as those
- * are the ones that keep the context and ringbuffer backing objects
- * pinned in place.
- */
-
- if (i915.enable_execlists) {
- /* Ensure irq handler finishes or is cancelled. */
- tasklet_kill(&engine->irq_tasklet);
-
- intel_execlists_cancel_requests(engine);
- }
-
- /*
- * We must free the requests after all the corresponding objects have
- * been moved off active lists. Which is the same order as the normal
- * retire_requests function does. This is important if object hold
- * implicit references on things like e.g. ppgtt address spaces through
- * the request.
- */
- while (!list_empty(&engine->request_list)) {
- struct drm_i915_gem_request *request;
+ if (!ring_hung)
+ return;
- request = list_first_entry(&engine->request_list,
- struct drm_i915_gem_request,
- list);
+ DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
+ engine->name, request->fence.seqno);
- i915_gem_request_retire(request);
- }
+ /* Setup the CS to resume from the breadcrumb of the hung request */
+ engine->reset_hw(engine, request);
- /* Having flushed all requests from all queues, we know that all
- * ringbuffers must now be empty. However, since we do not reclaim
- * all space when retiring the request (to prevent HEADs colliding
- * with rapid ringbuffer wraparound) the amount of available space
- * upon reset is less than when we start. Do one more pass over
- * all the ringbuffers to reset last_retired_head.
+ /* Users of the default context do not rely on logical state
+ * preserved between batches. They have to emit full state on
+ * every batch and so it is safe to execute queued requests following
+ * the hang.
+ *
+ * Other contexts preserve state, now corrupt. We want to skip all
+ * queued requests that reference the corrupt context.
*/
- list_for_each_entry(buffer, &engine->buffers, link) {
- buffer->last_retired_head = buffer->tail;
- intel_ring_update_space(buffer);
- }
+ incomplete_ctx = request->ctx;
+ if (i915_gem_context_is_default(incomplete_ctx))
+ return;
- intel_ring_init_seqno(engine, engine->last_submitted_seqno);
+ list_for_each_entry_continue(request, &engine->request_list, link)
+ if (request->ctx == incomplete_ctx)
+ reset_request(request);
+
+ engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
}
-void i915_gem_reset(struct drm_device *dev)
+void i915_gem_reset(struct drm_i915_private *dev_priv)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
- /*
- * Before we free the objects from the requests, we need to inspect
- * them for finding the guilty party. As the requests only borrow
- * their reference to the objects, the inspection must be done first.
- */
- for_each_engine(engine, dev_priv)
- i915_gem_reset_engine_status(engine);
+ i915_gem_retire_requests(dev_priv);
for_each_engine(engine, dev_priv)
- i915_gem_reset_engine_cleanup(engine);
+ i915_gem_reset_engine(engine);
+ mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
- i915_gem_context_reset(dev);
-
- i915_gem_restore_fences(dev);
+ i915_gem_restore_fences(&dev_priv->drm);
+}
- WARN_ON(i915_verify_lists(dev));
+static void nop_submit_request(struct drm_i915_gem_request *request)
+{
}
-/**
- * This function clears the request list as sequence numbers are passed.
- * @engine: engine to retire requests on
- */
-void
-i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
+static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
{
- WARN_ON(i915_verify_lists(engine->dev));
+ engine->submit_request = nop_submit_request;
- /* Retire requests first as we use it above for the early return.
- * If we retire requests last, we may use a later seqno and so clear
- * the requests lists without clearing the active list, leading to
- * confusion.
+ /* Mark all pending requests as complete so that any concurrent
+ * (lockless) lookup doesn't try and wait upon the request as we
+ * reset it.
*/
- while (!list_empty(&engine->request_list)) {
- struct drm_i915_gem_request *request;
-
- request = list_first_entry(&engine->request_list,
- struct drm_i915_gem_request,
- list);
-
- if (!i915_gem_request_completed(request))
- break;
-
- i915_gem_request_retire(request);
- }
+ intel_engine_init_seqno(engine, engine->last_submitted_seqno);
- /* Move any buffers on the active list that are no longer referenced
- * by the ringbuffer to the flushing/inactive lists as appropriate,
- * before we free the context associated with the requests.
+ /*
+ * Clear the execlists queue up before freeing the requests, as those
+ * are the ones that keep the context and ringbuffer backing objects
+ * pinned in place.
*/
- while (!list_empty(&engine->active_list)) {
- struct drm_i915_gem_object *obj;
-
- obj = list_first_entry(&engine->active_list,
- struct drm_i915_gem_object,
- engine_list[engine->id]);
-
- if (!list_empty(&obj->last_read_req[engine->id]->list))
- break;
- i915_gem_object_retire__read(obj, engine->id);
+ if (i915.enable_execlists) {
+ spin_lock(&engine->execlist_lock);
+ INIT_LIST_HEAD(&engine->execlist_queue);
+ i915_gem_request_put(engine->execlist_port[0].request);
+ i915_gem_request_put(engine->execlist_port[1].request);
+ memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+ spin_unlock(&engine->execlist_lock);
}
- WARN_ON(i915_verify_lists(engine->dev));
+ engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
}
-void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
- if (dev_priv->gt.active_engines == 0)
- return;
-
- GEM_BUG_ON(!dev_priv->gt.awake);
-
- for_each_engine(engine, dev_priv) {
- i915_gem_retire_requests_ring(engine);
- if (list_empty(&engine->request_list))
- dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
- }
+ i915_gem_context_lost(dev_priv);
+ for_each_engine(engine, dev_priv)
+ i915_gem_cleanup_engine(engine);
+ mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
- if (dev_priv->gt.active_engines == 0)
- queue_delayed_work(dev_priv->wq,
- &dev_priv->gt.idle_work,
- msecs_to_jiffies(100));
+ i915_gem_retire_requests(dev_priv);
}
static void
* We do not need to do this test under locking as in the worst-case
* we queue the retire worker once too often.
*/
- if (READ_ONCE(dev_priv->gt.awake))
+ if (READ_ONCE(dev_priv->gt.awake)) {
+ i915_queue_hangcheck(dev_priv);
queue_delayed_work(dev_priv->wq,
&dev_priv->gt.retire_work,
round_jiffies_up_relative(HZ));
+ }
}
static void
container_of(work, typeof(*dev_priv), gt.idle_work.work);
struct drm_device *dev = &dev_priv->drm;
struct intel_engine_cs *engine;
- unsigned int stuck_engines;
bool rearm_hangcheck;
if (!READ_ONCE(dev_priv->gt.awake))
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
if (!mutex_trylock(&dev->struct_mutex)) {
- /* Currently busy, come back later */
- mod_delayed_work(dev_priv->wq,
- &dev_priv->gt.idle_work,
- msecs_to_jiffies(50));
- goto out_rearm;
- }
-
- if (dev_priv->gt.active_engines)
- goto out_unlock;
-
- for_each_engine(engine, dev_priv)
- i915_gem_batch_pool_fini(&engine->batch_pool);
-
- GEM_BUG_ON(!dev_priv->gt.awake);
- dev_priv->gt.awake = false;
- rearm_hangcheck = false;
-
- stuck_engines = intel_kick_waiters(dev_priv);
- if (unlikely(stuck_engines)) {
- DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n");
- dev_priv->gpu_error.missed_irq_rings |= stuck_engines;
- }
-
- if (INTEL_GEN(dev_priv) >= 6)
- gen6_rps_idle(dev_priv);
- intel_runtime_pm_put(dev_priv);
-out_unlock:
- mutex_unlock(&dev->struct_mutex);
-
-out_rearm:
- if (rearm_hangcheck) {
- GEM_BUG_ON(!dev_priv->gt.awake);
- i915_queue_hangcheck(dev_priv);
- }
-}
-
-/**
- * Ensures that an object will eventually get non-busy by flushing any required
- * write domains, emitting any outstanding lazy request and retiring and
- * completed requests.
- * @obj: object to flush
- */
-static int
-i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
-{
- int i;
-
- if (!obj->active)
- return 0;
-
- for (i = 0; i < I915_NUM_ENGINES; i++) {
- struct drm_i915_gem_request *req;
-
- req = obj->last_read_req[i];
- if (req == NULL)
- continue;
-
- if (i915_gem_request_completed(req))
- i915_gem_object_retire__read(obj, i);
- }
-
- return 0;
-}
-
-/**
- * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
- * @dev: drm device pointer
- * @data: ioctl data blob
- * @file: drm file pointer
- *
- * Returns 0 if successful, else an error is returned with the remaining time in
- * the timeout parameter.
- * -ETIME: object is still busy after timeout
- * -ERESTARTSYS: signal interrupted the wait
- * -ENONENT: object doesn't exist
- * Also possible, but rare:
- * -EAGAIN: GPU wedged
- * -ENOMEM: damn
- * -ENODEV: Internal IRQ fail
- * -E?: The add request failed
- *
- * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
- * non-zero timeout parameter the wait ioctl will wait for the given number of
- * nanoseconds on an object becoming unbusy. Since the wait itself does so
- * without holding struct_mutex the object may become re-busied before this
- * function completes. A similar but shorter * race condition exists in the busy
- * ioctl
- */
-int
-i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
-{
- struct drm_i915_gem_wait *args = data;
- struct drm_i915_gem_object *obj;
- struct drm_i915_gem_request *req[I915_NUM_ENGINES];
- int i, n = 0;
- int ret;
-
- if (args->flags != 0)
- return -EINVAL;
-
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ret;
-
- obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle));
- if (&obj->base == NULL) {
- mutex_unlock(&dev->struct_mutex);
- return -ENOENT;
- }
-
- /* Need to make sure the object gets inactive eventually. */
- ret = i915_gem_object_flush_active(obj);
- if (ret)
- goto out;
-
- if (!obj->active)
- goto out;
-
- /* Do this after OLR check to make sure we make forward progress polling
- * on this IOCTL with a timeout == 0 (like busy ioctl)
- */
- if (args->timeout_ns == 0) {
- ret = -ETIME;
- goto out;
- }
-
- drm_gem_object_unreference(&obj->base);
-
- for (i = 0; i < I915_NUM_ENGINES; i++) {
- if (obj->last_read_req[i] == NULL)
- continue;
-
- req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
- }
-
- mutex_unlock(&dev->struct_mutex);
-
- for (i = 0; i < n; i++) {
- if (ret == 0)
- ret = __i915_wait_request(req[i], true,
- args->timeout_ns > 0 ? &args->timeout_ns : NULL,
- to_rps_client(file));
- i915_gem_request_unreference(req[i]);
- }
- return ret;
-
-out:
- drm_gem_object_unreference(&obj->base);
- mutex_unlock(&dev->struct_mutex);
- return ret;
-}
-
-static int
-__i915_gem_object_sync(struct drm_i915_gem_object *obj,
- struct intel_engine_cs *to,
- struct drm_i915_gem_request *from_req,
- struct drm_i915_gem_request **to_req)
-{
- struct intel_engine_cs *from;
- int ret;
-
- from = i915_gem_request_get_engine(from_req);
- if (to == from)
- return 0;
-
- if (i915_gem_request_completed(from_req))
- return 0;
-
- if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) {
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- ret = __i915_wait_request(from_req,
- i915->mm.interruptible,
- NULL,
- &i915->rps.semaphores);
- if (ret)
- return ret;
-
- i915_gem_object_retire_request(obj, from_req);
- } else {
- int idx = intel_ring_sync_index(from, to);
- u32 seqno = i915_gem_request_get_seqno(from_req);
-
- WARN_ON(!to_req);
-
- if (seqno <= from->semaphore.sync_seqno[idx])
- return 0;
+ /* Currently busy, come back later */
+ mod_delayed_work(dev_priv->wq,
+ &dev_priv->gt.idle_work,
+ msecs_to_jiffies(50));
+ goto out_rearm;
+ }
- if (*to_req == NULL) {
- struct drm_i915_gem_request *req;
+ if (dev_priv->gt.active_engines)
+ goto out_unlock;
- req = i915_gem_request_alloc(to, NULL);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ for_each_engine(engine, dev_priv)
+ i915_gem_batch_pool_fini(&engine->batch_pool);
- *to_req = req;
- }
+ GEM_BUG_ON(!dev_priv->gt.awake);
+ dev_priv->gt.awake = false;
+ rearm_hangcheck = false;
- trace_i915_gem_ring_sync_to(*to_req, from, from_req);
- ret = to->semaphore.sync_to(*to_req, from, seqno);
- if (ret)
- return ret;
+ if (INTEL_GEN(dev_priv) >= 6)
+ gen6_rps_idle(dev_priv);
+ intel_runtime_pm_put(dev_priv);
+out_unlock:
+ mutex_unlock(&dev->struct_mutex);
- /* We use last_read_req because sync_to()
- * might have just caused seqno wrap under
- * the radar.
- */
- from->semaphore.sync_seqno[idx] =
- i915_gem_request_get_seqno(obj->last_read_req[from->id]);
+out_rearm:
+ if (rearm_hangcheck) {
+ GEM_BUG_ON(!dev_priv->gt.awake);
+ i915_queue_hangcheck(dev_priv);
}
+}
- return 0;
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
+{
+ struct drm_i915_gem_object *obj = to_intel_bo(gem);
+ struct drm_i915_file_private *fpriv = file->driver_priv;
+ struct i915_vma *vma, *vn;
+
+ mutex_lock(&obj->base.dev->struct_mutex);
+ list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
+ if (vma->vm->file == fpriv)
+ i915_vma_close(vma);
+ mutex_unlock(&obj->base.dev->struct_mutex);
}
/**
- * i915_gem_object_sync - sync an object to a ring.
- *
- * @obj: object which may be in use on another ring.
- * @to: ring we wish to use the object on. May be NULL.
- * @to_req: request we wish to use the object for. See below.
- * This will be allocated and returned if a request is
- * required but not passed in.
- *
- * This code is meant to abstract object synchronization with the GPU.
- * Calling with NULL implies synchronizing the object with the CPU
- * rather than a particular GPU ring. Conceptually we serialise writes
- * between engines inside the GPU. We only allow one engine to write
- * into a buffer at any time, but multiple readers. To ensure each has
- * a coherent view of memory, we must:
- *
- * - If there is an outstanding write request to the object, the new
- * request must wait for it to complete (either CPU or in hw, requests
- * on the same ring will be naturally ordered).
- *
- * - If we are a write request (pending_write_domain is set), the new
- * request must wait for outstanding read requests to complete.
+ * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
*
- * For CPU synchronisation (NULL to) no request is required. For syncing with
- * rings to_req must be non-NULL. However, a request does not have to be
- * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
- * request will be allocated automatically and returned through *to_req. Note
- * that it is not guaranteed that commands will be emitted (because the system
- * might already be idle). Hence there is no need to create a request that
- * might never have any work submitted. Note further that if a request is
- * returned in *to_req, it is the responsibility of the caller to submit
- * that request (after potentially adding more work to it).
+ * Returns 0 if successful, else an error is returned with the remaining time in
+ * the timeout parameter.
+ * -ETIME: object is still busy after timeout
+ * -ERESTARTSYS: signal interrupted the wait
+ * -ENONENT: object doesn't exist
+ * Also possible, but rare:
+ * -EAGAIN: GPU wedged
+ * -ENOMEM: damn
+ * -ENODEV: Internal IRQ fail
+ * -E?: The add request failed
*
- * Returns 0 if successful, else propagates up the lower layer error.
+ * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
+ * non-zero timeout parameter the wait ioctl will wait for the given number of
+ * nanoseconds on an object becoming unbusy. Since the wait itself does so
+ * without holding struct_mutex the object may become re-busied before this
+ * function completes. A similar but shorter * race condition exists in the busy
+ * ioctl
*/
int
-i915_gem_object_sync(struct drm_i915_gem_object *obj,
- struct intel_engine_cs *to,
- struct drm_i915_gem_request **to_req)
+i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
- const bool readonly = obj->base.pending_write_domain == 0;
- struct drm_i915_gem_request *req[I915_NUM_ENGINES];
- int ret, i, n;
+ struct drm_i915_gem_wait *args = data;
+ struct intel_rps_client *rps = to_rps_client(file);
+ struct drm_i915_gem_object *obj;
+ unsigned long active;
+ int idx, ret = 0;
- if (!obj->active)
- return 0;
+ if (args->flags != 0)
+ return -EINVAL;
- if (to == NULL)
- return i915_gem_object_wait_rendering(obj, readonly);
+ obj = i915_gem_object_lookup(file, args->bo_handle);
+ if (!obj)
+ return -ENOENT;
- n = 0;
- if (readonly) {
- if (obj->last_write_req)
- req[n++] = obj->last_write_req;
- } else {
- for (i = 0; i < I915_NUM_ENGINES; i++)
- if (obj->last_read_req[i])
- req[n++] = obj->last_read_req[i];
- }
- for (i = 0; i < n; i++) {
- ret = __i915_gem_object_sync(obj, to, req[i], to_req);
+ active = __I915_BO_ACTIVE(obj);
+ for_each_active(active, idx) {
+ s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
+ ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
+ I915_WAIT_INTERRUPTIBLE,
+ timeout, rps);
if (ret)
- return ret;
+ break;
}
- return 0;
-}
-
-static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
-{
- u32 old_write_domain, old_read_domains;
-
- /* Force a pagefault for domain tracking on next user access */
- i915_gem_release_mmap(obj);
-
- if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
- return;
-
- old_read_domains = obj->base.read_domains;
- old_write_domain = obj->base.write_domain;
-
- obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
- obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
-
- trace_i915_gem_object_change_domain(obj,
- old_read_domains,
- old_write_domain);
+ i915_gem_object_put_unlocked(obj);
+ return ret;
}
static void __i915_vma_iounmap(struct i915_vma *vma)
{
- GEM_BUG_ON(vma->pin_count);
+ GEM_BUG_ON(i915_vma_is_pinned(vma));
if (vma->iomap == NULL)
return;
vma->iomap = NULL;
}
-static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
+int i915_vma_unbind(struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ unsigned long active;
int ret;
- if (list_empty(&vma->obj_link))
- return 0;
-
- if (!drm_mm_node_allocated(&vma->node)) {
- i915_gem_vma_destroy(vma);
- return 0;
- }
-
- if (vma->pin_count)
- return -EBUSY;
+ /* First wait upon any activity as retiring the request may
+ * have side-effects such as unpinning or even unbinding this vma.
+ */
+ active = i915_vma_get_active(vma);
+ if (active) {
+ int idx;
+
+ /* When a closed VMA is retired, it is unbound - eek.
+ * In order to prevent it from being recursively closed,
+ * take a pin on the vma so that the second unbind is
+ * aborted.
+ */
+ __i915_vma_pin(vma);
- BUG_ON(obj->pages == NULL);
+ for_each_active(active, idx) {
+ ret = i915_gem_active_retire(&vma->last_read[idx],
+ &vma->vm->dev->struct_mutex);
+ if (ret)
+ break;
+ }
- if (wait) {
- ret = i915_gem_object_wait_rendering(obj, false);
+ __i915_vma_unpin(vma);
if (ret)
return ret;
+
+ GEM_BUG_ON(i915_vma_is_active(vma));
}
- if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
- i915_gem_object_finish_gtt(obj);
+ if (i915_vma_is_pinned(vma))
+ return -EBUSY;
+
+ if (!drm_mm_node_allocated(&vma->node))
+ goto destroy;
+ GEM_BUG_ON(obj->bind_count == 0);
+ GEM_BUG_ON(!obj->pages);
+
+ if (i915_vma_is_map_and_fenceable(vma)) {
/* release the fence reg _after_ flushing */
- ret = i915_gem_object_put_fence(obj);
+ ret = i915_vma_put_fence(vma);
if (ret)
return ret;
+ /* Force a pagefault for domain tracking on next user access */
+ i915_gem_release_mmap(obj);
+
__i915_vma_iounmap(vma);
+ vma->flags &= ~I915_VMA_CAN_FENCE;
}
- trace_i915_vma_unbind(vma);
-
- vma->vm->unbind_vma(vma);
- vma->bound = 0;
-
- list_del_init(&vma->vm_link);
- if (vma->is_ggtt) {
- if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
- obj->map_and_fenceable = false;
- } else if (vma->ggtt_view.pages) {
- sg_free_table(vma->ggtt_view.pages);
- kfree(vma->ggtt_view.pages);
- }
- vma->ggtt_view.pages = NULL;
+ if (likely(!vma->vm->closed)) {
+ trace_i915_vma_unbind(vma);
+ vma->vm->unbind_vma(vma);
}
+ vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
drm_mm_remove_node(&vma->node);
- i915_gem_vma_destroy(vma);
+ list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+
+ if (vma->pages != obj->pages) {
+ GEM_BUG_ON(!vma->pages);
+ sg_free_table(vma->pages);
+ kfree(vma->pages);
+ }
+ vma->pages = NULL;
/* Since the unbound list is global, only move to that list if
* no more VMAs exist. */
- if (list_empty(&obj->vma_list))
- list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
+ if (--obj->bind_count == 0)
+ list_move_tail(&obj->global_list,
+ &to_i915(obj->base.dev)->mm.unbound_list);
/* And finally now the object is completely decoupled from this vma,
* we can drop its hold on the backing storage and allow it to be
*/
i915_gem_object_unpin_pages(obj);
- return 0;
-}
-
-int i915_vma_unbind(struct i915_vma *vma)
-{
- return __i915_vma_unbind(vma, true);
-}
+destroy:
+ if (unlikely(i915_vma_is_closed(vma)))
+ i915_vma_destroy(vma);
-int __i915_vma_unbind_no_wait(struct i915_vma *vma)
-{
- return __i915_vma_unbind(vma, false);
+ return 0;
}
-int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv)
+int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
+ unsigned int flags)
{
struct intel_engine_cs *engine;
int ret;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
for_each_engine(engine, dev_priv) {
if (engine->last_context == NULL)
continue;
- ret = intel_engine_idle(engine);
+ ret = intel_engine_idle(engine, flags);
if (ret)
return ret;
}
- WARN_ON(i915_verify_lists(dev));
return 0;
}
}
/**
- * Finds free space in the GTT aperture and binds the object or a view of it
- * there.
- * @obj: object to bind
- * @vm: address space to bind into
- * @ggtt_view: global gtt view if applicable
- * @alignment: requested alignment
+ * i915_vma_insert - finds a slot for the vma in its address space
+ * @vma: the vma
+ * @size: requested size in bytes (can be larger than the VMA)
+ * @alignment: required alignment
* @flags: mask of PIN_* flags to use
+ *
+ * First we try to allocate some free space that meets the requirements for
+ * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
+ * preferrably the oldest idle entry to make room for the new VMA.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
*/
-static struct i915_vma *
-i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- const struct i915_ggtt_view *ggtt_view,
- unsigned alignment,
- uint64_t flags)
+static int
+i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
{
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
- u32 fence_alignment, unfenced_alignment;
- u32 search_flag, alloc_flag;
+ struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
+ struct drm_i915_gem_object *obj = vma->obj;
u64 start, end;
- u64 size, fence_size;
- struct i915_vma *vma;
int ret;
- if (i915_is_ggtt(vm)) {
- u32 view_size;
-
- if (WARN_ON(!ggtt_view))
- return ERR_PTR(-EINVAL);
-
- view_size = i915_ggtt_view_size(obj, ggtt_view);
-
- fence_size = i915_gem_get_gtt_size(dev,
- view_size,
- obj->tiling_mode);
- fence_alignment = i915_gem_get_gtt_alignment(dev,
- view_size,
- obj->tiling_mode,
- true);
- unfenced_alignment = i915_gem_get_gtt_alignment(dev,
- view_size,
- obj->tiling_mode,
- false);
- size = flags & PIN_MAPPABLE ? fence_size : view_size;
- } else {
- fence_size = i915_gem_get_gtt_size(dev,
- obj->base.size,
- obj->tiling_mode);
- fence_alignment = i915_gem_get_gtt_alignment(dev,
- obj->base.size,
- obj->tiling_mode,
- true);
- unfenced_alignment =
- i915_gem_get_gtt_alignment(dev,
- obj->base.size,
- obj->tiling_mode,
- false);
- size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
- }
+ GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+ GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
+
+ size = max(size, vma->size);
+ if (flags & PIN_MAPPABLE)
+ size = i915_gem_get_ggtt_size(dev_priv, size,
+ i915_gem_object_get_tiling(obj));
+
+ alignment = max(max(alignment, vma->display_alignment),
+ i915_gem_get_ggtt_alignment(dev_priv, size,
+ i915_gem_object_get_tiling(obj),
+ flags & PIN_MAPPABLE));
start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
- end = vm->total;
+
+ end = vma->vm->total;
if (flags & PIN_MAPPABLE)
- end = min_t(u64, end, ggtt->mappable_end);
+ end = min_t(u64, end, dev_priv->ggtt.mappable_end);
if (flags & PIN_ZONE_4G)
end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
- if (alignment == 0)
- alignment = flags & PIN_MAPPABLE ? fence_alignment :
- unfenced_alignment;
- if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
- DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
- ggtt_view ? ggtt_view->type : 0,
- alignment);
- return ERR_PTR(-EINVAL);
- }
-
/* If binding the object/GGTT view requires more space than the entire
* aperture has, reject it early before evicting everything in a vain
* attempt to find space.
*/
if (size > end) {
- DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
- ggtt_view ? ggtt_view->type : 0,
- size,
+ DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
+ size, obj->base.size,
flags & PIN_MAPPABLE ? "mappable" : "total",
end);
- return ERR_PTR(-E2BIG);
+ return -E2BIG;
}
ret = i915_gem_object_get_pages(obj);
if (ret)
- return ERR_PTR(ret);
+ return ret;
i915_gem_object_pin_pages(obj);
- vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
- i915_gem_obj_lookup_or_create_vma(obj, vm);
-
- if (IS_ERR(vma))
- goto err_unpin;
-
if (flags & PIN_OFFSET_FIXED) {
- uint64_t offset = flags & PIN_OFFSET_MASK;
-
- if (offset & (alignment - 1) || offset + size > end) {
+ u64 offset = flags & PIN_OFFSET_MASK;
+ if (offset & (alignment - 1) || offset > end - size) {
ret = -EINVAL;
- goto err_free_vma;
+ goto err_unpin;
}
+
vma->node.start = offset;
vma->node.size = size;
vma->node.color = obj->cache_level;
- ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
if (ret) {
ret = i915_gem_evict_for_vma(vma);
if (ret == 0)
- ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
+ if (ret)
+ goto err_unpin;
}
- if (ret)
- goto err_free_vma;
} else {
+ u32 search_flag, alloc_flag;
+
if (flags & PIN_HIGH) {
search_flag = DRM_MM_SEARCH_BELOW;
alloc_flag = DRM_MM_CREATE_TOP;
alloc_flag = DRM_MM_CREATE_DEFAULT;
}
+ /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
+ * so we know that we always have a minimum alignment of 4096.
+ * The drm_mm range manager is optimised to return results
+ * with zero alignment, so where possible use the optimal
+ * path.
+ */
+ if (alignment <= 4096)
+ alignment = 0;
+
search_free:
- ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+ ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
+ &vma->node,
size, alignment,
obj->cache_level,
start, end,
search_flag,
alloc_flag);
if (ret) {
- ret = i915_gem_evict_something(dev, vm, size, alignment,
+ ret = i915_gem_evict_something(vma->vm, size, alignment,
obj->cache_level,
start, end,
flags);
if (ret == 0)
goto search_free;
- goto err_free_vma;
+ goto err_unpin;
}
}
- if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
- ret = -EINVAL;
- goto err_remove_node;
- }
-
- trace_i915_vma_bind(vma, flags);
- ret = i915_vma_bind(vma, obj->cache_level, flags);
- if (ret)
- goto err_remove_node;
+ GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
- list_add_tail(&vma->vm_link, &vm->inactive_list);
+ list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+ obj->bind_count++;
- return vma;
+ return 0;
-err_remove_node:
- drm_mm_remove_node(&vma->node);
-err_free_vma:
- i915_gem_vma_destroy(vma);
- vma = ERR_PTR(ret);
err_unpin:
i915_gem_object_unpin_pages(obj);
- return vma;
+ return ret;
}
bool
static void
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
{
- uint32_t old_write_domain;
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
return;
/* No actual flushing is required for the GTT write domain. Writes
- * to it immediately go to main memory as far as we know, so there's
+ * to it "immediately" go to main memory as far as we know, so there's
* no chipset flush. It also doesn't land in render cache.
*
* However, we do have to enforce the order so that all writes through
* the GTT land before any writes to the device, such as updates to
* the GATT itself.
+ *
+ * We also have to wait a bit for the writes to land from the GTT.
+ * An uncached read (i.e. mmio) seems to be ideal for the round-trip
+ * timing. This issue has only been observed when switching quickly
+ * between GTT writes and CPU reads from inside the kernel on recent hw,
+ * and it appears to only affect discrete GTT blocks (i.e. on LLC
+ * system agents we cannot reproduce this behaviour).
*/
wmb();
+ if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
+ POSTING_READ(RING_ACTHD(dev_priv->engine[RCS].mmio_base));
- old_write_domain = obj->base.write_domain;
- obj->base.write_domain = 0;
-
- intel_fb_obj_flush(obj, false, ORIGIN_GTT);
+ intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
+ obj->base.write_domain = 0;
trace_i915_gem_object_change_domain(obj,
obj->base.read_domains,
- old_write_domain);
+ I915_GEM_DOMAIN_GTT);
}
/** Flushes the CPU write domain for the object if it's dirty. */
static void
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
{
- uint32_t old_write_domain;
-
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
return;
if (i915_gem_clflush_object(obj, obj->pin_display))
i915_gem_chipset_flush(to_i915(obj->base.dev));
- old_write_domain = obj->base.write_domain;
- obj->base.write_domain = 0;
-
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+ obj->base.write_domain = 0;
trace_i915_gem_object_change_domain(obj,
obj->base.read_domains,
- old_write_domain);
+ I915_GEM_DOMAIN_CPU);
+}
+
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+ struct i915_vma *vma;
+
+ list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ if (!i915_vma_is_ggtt(vma))
+ continue;
+
+ if (i915_vma_is_active(vma))
+ continue;
+
+ if (!drm_mm_node_allocated(&vma->node))
+ continue;
+
+ list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+ }
}
/**
int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
{
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
uint32_t old_write_domain, old_read_domains;
- struct i915_vma *vma;
int ret;
- if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
- return 0;
-
ret = i915_gem_object_wait_rendering(obj, !write);
if (ret)
return ret;
+ if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
+ return 0;
+
/* Flush and acquire obj->pages so that we are coherent through
* direct access in memory with previous cached writes through
* shmemfs and that our cache domain tracking remains valid.
old_write_domain);
/* And bump the LRU for this access */
- vma = i915_gem_obj_to_ggtt(obj);
- if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
- list_move_tail(&vma->vm_link,
- &ggtt->base.inactive_list);
+ i915_gem_object_bump_inactive_ggtt(obj);
return 0;
}
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level)
{
- struct drm_device *dev = obj->base.dev;
- struct i915_vma *vma, *next;
- bool bound = false;
+ struct i915_vma *vma;
int ret = 0;
if (obj->cache_level == cache_level)
* catch the issue of the CS prefetch crossing page boundaries and
* reading an invalid PTE on older architectures.
*/
- list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
+restart:
+ list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
- if (vma->pin_count) {
+ if (i915_vma_is_pinned(vma)) {
DRM_DEBUG("can not change the cache level of pinned objects\n");
return -EBUSY;
}
- if (!i915_gem_valid_gtt_space(vma, cache_level)) {
- ret = i915_vma_unbind(vma);
- if (ret)
- return ret;
- } else
- bound = true;
+ if (i915_gem_valid_gtt_space(vma, cache_level))
+ continue;
+
+ ret = i915_vma_unbind(vma);
+ if (ret)
+ return ret;
+
+ /* As unbinding may affect other elements in the
+ * obj->vma_list (due to side-effects from retiring
+ * an active vma), play safe and restart the iterator.
+ */
+ goto restart;
}
/* We can reuse the existing drm_mm nodes but need to change the
* rewrite the PTE in the belief that doing so tramples upon less
* state and so involves less work.
*/
- if (bound) {
+ if (obj->bind_count) {
/* Before we change the PTE, the GPU must not be accessing it.
* If we wait upon the object, we know that all the bound
* VMA are no longer active.
if (ret)
return ret;
- if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
+ if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
/* Access to snoopable pages through the GTT is
* incoherent and on some machines causes a hard
* lockup. Relinquish the CPU mmaping to force
* dropped the fence as all snoopable access is
* supposed to be linear.
*/
- ret = i915_gem_object_put_fence(obj);
- if (ret)
- return ret;
+ list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ ret = i915_vma_put_fence(vma);
+ if (ret)
+ return ret;
+ }
} else {
/* We either have incoherent backing store and
* so no GTT access or the architecture is fully
struct drm_i915_gem_caching *args = data;
struct drm_i915_gem_object *obj;
- obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- if (&obj->base == NULL)
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj)
return -ENOENT;
switch (obj->cache_level) {
break;
}
- drm_gem_object_unreference_unlocked(&obj->base);
+ i915_gem_object_put_unlocked(obj);
return 0;
}
if (ret)
goto rpm_put;
- obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- if (&obj->base == NULL) {
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj) {
ret = -ENOENT;
goto unlock;
}
ret = i915_gem_object_set_cache_level(obj, level);
- drm_gem_object_unreference(&obj->base);
+ i915_gem_object_put(obj);
unlock:
mutex_unlock(&dev->struct_mutex);
rpm_put:
* Can be called from an uninterruptible phase (modesetting) and allows
* any flushes to be pipelined (for pageflips).
*/
-int
+struct i915_vma *
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
u32 alignment,
const struct i915_ggtt_view *view)
{
+ struct i915_vma *vma;
u32 old_read_domains, old_write_domain;
int ret;
*/
ret = i915_gem_object_set_cache_level(obj,
HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
- if (ret)
+ if (ret) {
+ vma = ERR_PTR(ret);
goto err_unpin_display;
+ }
/* As the user may map the buffer once pinned in the display plane
* (e.g. libkms for the bootup splash), we have to ensure that we
- * always use map_and_fenceable for all scanout buffers.
+ * always use map_and_fenceable for all scanout buffers. However,
+ * it may simply be too big to fit into mappable, in which case
+ * put it anyway and hope that userspace can cope (but always first
+ * try to preserve the existing ABI).
*/
- ret = i915_gem_object_ggtt_pin(obj, view, alignment,
- view->type == I915_GGTT_VIEW_NORMAL ?
- PIN_MAPPABLE : 0);
- if (ret)
+ vma = ERR_PTR(-ENOSPC);
+ if (view->type == I915_GGTT_VIEW_NORMAL)
+ vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+ PIN_MAPPABLE | PIN_NONBLOCK);
+ if (IS_ERR(vma))
+ vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
+ if (IS_ERR(vma))
goto err_unpin_display;
+ vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+
+ WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
+
i915_gem_object_flush_cpu_write_domain(obj);
old_write_domain = obj->base.write_domain;
old_read_domains,
old_write_domain);
- return 0;
+ return vma;
err_unpin_display:
obj->pin_display--;
- return ret;
+ return vma;
}
void
-i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view)
+i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
{
- if (WARN_ON(obj->pin_display == 0))
+ if (WARN_ON(vma->obj->pin_display == 0))
return;
- i915_gem_object_ggtt_unpin_view(obj, view);
+ if (--vma->obj->pin_display == 0)
+ vma->display_alignment = 0;
- obj->pin_display--;
+ /* Bump the LRU to try and avoid premature eviction whilst flipping */
+ if (!i915_vma_is_active(vma))
+ list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+
+ i915_vma_unpin(vma);
+ WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
}
/**
uint32_t old_write_domain, old_read_domains;
int ret;
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
- return 0;
-
ret = i915_gem_object_wait_rendering(obj, !write);
if (ret)
return ret;
+ if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ return 0;
+
i915_gem_object_flush_gtt_write_domain(obj);
old_write_domain = obj->base.write_domain;
target = request;
}
if (target)
- i915_gem_request_reference(target);
+ i915_gem_request_get(target);
spin_unlock(&file_priv->mm.lock);
if (target == NULL)
return 0;
- ret = __i915_wait_request(target, true, NULL, NULL);
- i915_gem_request_unreference(target);
+ ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+ i915_gem_request_put(target);
return ret;
}
static bool
-i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
+i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
{
- struct drm_i915_gem_object *obj = vma->obj;
+ if (!drm_mm_node_allocated(&vma->node))
+ return false;
+
+ if (vma->node.size < size)
+ return true;
- if (alignment &&
- vma->node.start & (alignment - 1))
+ if (alignment && vma->node.start & (alignment - 1))
return true;
- if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
+ if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
return true;
if (flags & PIN_OFFSET_BIAS &&
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
bool mappable, fenceable;
u32 fence_size, fence_alignment;
- fence_size = i915_gem_get_gtt_size(obj->base.dev,
- obj->base.size,
- obj->tiling_mode);
- fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
- obj->base.size,
- obj->tiling_mode,
- true);
+ fence_size = i915_gem_get_ggtt_size(dev_priv,
+ vma->size,
+ i915_gem_object_get_tiling(obj));
+ fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
+ vma->size,
+ i915_gem_object_get_tiling(obj),
+ true);
fenceable = (vma->node.size == fence_size &&
(vma->node.start & (fence_alignment - 1)) == 0);
mappable = (vma->node.start + fence_size <=
- to_i915(obj->base.dev)->ggtt.mappable_end);
+ dev_priv->ggtt.mappable_end);
- obj->map_and_fenceable = mappable && fenceable;
+ if (mappable && fenceable)
+ vma->flags |= I915_VMA_CAN_FENCE;
+ else
+ vma->flags &= ~I915_VMA_CAN_FENCE;
}
-static int
-i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- const struct i915_ggtt_view *ggtt_view,
- uint32_t alignment,
- uint64_t flags)
+int __i915_vma_do_pin(struct i915_vma *vma,
+ u64 size, u64 alignment, u64 flags)
{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- struct i915_vma *vma;
- unsigned bound;
+ unsigned int bound = vma->flags;
int ret;
- if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
- return -ENODEV;
-
- if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
- return -EINVAL;
-
- if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
- return -EINVAL;
-
- if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
- return -EINVAL;
-
- vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
- i915_gem_obj_to_vma(obj, vm);
-
- if (vma) {
- if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
- return -EBUSY;
+ GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
+ GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
- if (i915_vma_misplaced(vma, alignment, flags)) {
- WARN(vma->pin_count,
- "bo is already pinned in %s with incorrect alignment:"
- " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
- " obj->map_and_fenceable=%d\n",
- ggtt_view ? "ggtt" : "ppgtt",
- upper_32_bits(vma->node.start),
- lower_32_bits(vma->node.start),
- alignment,
- !!(flags & PIN_MAPPABLE),
- obj->map_and_fenceable);
- ret = i915_vma_unbind(vma);
- if (ret)
- return ret;
-
- vma = NULL;
- }
+ if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
+ ret = -EBUSY;
+ goto err;
}
- bound = vma ? vma->bound : 0;
- if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
- vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
- flags);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
- } else {
- ret = i915_vma_bind(vma, obj->cache_level, flags);
+ if ((bound & I915_VMA_BIND_MASK) == 0) {
+ ret = i915_vma_insert(vma, size, alignment, flags);
if (ret)
- return ret;
+ goto err;
}
- if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
- (bound ^ vma->bound) & GLOBAL_BIND) {
+ ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
+ if (ret)
+ goto err;
+
+ if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
__i915_vma_set_map_and_fenceable(vma);
- WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
- }
- vma->pin_count++;
+ GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
return 0;
-}
-int
-i915_gem_object_pin(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- uint32_t alignment,
- uint64_t flags)
-{
- return i915_gem_object_do_pin(obj, vm,
- i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
- alignment, flags);
+err:
+ __i915_vma_unpin(vma);
+ return ret;
}
-int
+struct i915_vma *
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
- uint32_t alignment,
- uint64_t flags)
+ u64 size,
+ u64 alignment,
+ u64 flags)
{
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ struct i915_address_space *vm = &to_i915(obj->base.dev)->ggtt.base;
+ struct i915_vma *vma;
+ int ret;
+
+ vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
+ if (IS_ERR(vma))
+ return vma;
+
+ if (i915_vma_misplaced(vma, size, alignment, flags)) {
+ if (flags & PIN_NONBLOCK &&
+ (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
+ return ERR_PTR(-ENOSPC);
+
+ WARN(i915_vma_is_pinned(vma),
+ "bo is already pinned in ggtt with incorrect alignment:"
+ " offset=%08x, req.alignment=%llx,"
+ " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
+ i915_ggtt_offset(vma), alignment,
+ !!(flags & PIN_MAPPABLE),
+ i915_vma_is_map_and_fenceable(vma));
+ ret = i915_vma_unbind(vma);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+ if (ret)
+ return ERR_PTR(ret);
- BUG_ON(!view);
+ return vma;
+}
- return i915_gem_object_do_pin(obj, &ggtt->base, view,
- alignment, flags | PIN_GLOBAL);
+static __always_inline unsigned int __busy_read_flag(unsigned int id)
+{
+ /* Note that we could alias engines in the execbuf API, but
+ * that would be very unwise as it prevents userspace from
+ * fine control over engine selection. Ahem.
+ *
+ * This should be something like EXEC_MAX_ENGINE instead of
+ * I915_NUM_ENGINES.
+ */
+ BUILD_BUG_ON(I915_NUM_ENGINES > 16);
+ return 0x10000 << id;
}
-void
-i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view)
+static __always_inline unsigned int __busy_write_id(unsigned int id)
{
- struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
+ /* The uABI guarantees an active writer is also amongst the read
+ * engines. This would be true if we accessed the activity tracking
+ * under the lock, but as we perform the lookup of the object and
+ * its activity locklessly we can not guarantee that the last_write
+ * being active implies that we have set the same engine flag from
+ * last_read - hence we always set both read and write busy for
+ * last_write.
+ */
+ return id | __busy_read_flag(id);
+}
+
+static __always_inline unsigned int
+__busy_set_if_active(const struct i915_gem_active *active,
+ unsigned int (*flag)(unsigned int id))
+{
+ struct drm_i915_gem_request *request;
+
+ request = rcu_dereference(active->request);
+ if (!request || i915_gem_request_completed(request))
+ return 0;
+
+ /* This is racy. See __i915_gem_active_get_rcu() for an in detail
+ * discussion of how to handle the race correctly, but for reporting
+ * the busy state we err on the side of potentially reporting the
+ * wrong engine as being busy (but we guarantee that the result
+ * is at least self-consistent).
+ *
+ * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
+ * whilst we are inspecting it, even under the RCU read lock as we are.
+ * This means that there is a small window for the engine and/or the
+ * seqno to have been overwritten. The seqno will always be in the
+ * future compared to the intended, and so we know that if that
+ * seqno is idle (on whatever engine) our request is idle and the
+ * return 0 above is correct.
+ *
+ * The issue is that if the engine is switched, it is just as likely
+ * to report that it is busy (but since the switch happened, we know
+ * the request should be idle). So there is a small chance that a busy
+ * result is actually the wrong engine.
+ *
+ * So why don't we care?
+ *
+ * For starters, the busy ioctl is a heuristic that is by definition
+ * racy. Even with perfect serialisation in the driver, the hardware
+ * state is constantly advancing - the state we report to the user
+ * is stale.
+ *
+ * The critical information for the busy-ioctl is whether the object
+ * is idle as userspace relies on that to detect whether its next
+ * access will stall, or if it has missed submitting commands to
+ * the hardware allowing the GPU to stall. We never generate a
+ * false-positive for idleness, thus busy-ioctl is reliable at the
+ * most fundamental level, and we maintain the guarantee that a
+ * busy object left to itself will eventually become idle (and stay
+ * idle!).
+ *
+ * We allow ourselves the leeway of potentially misreporting the busy
+ * state because that is an optimisation heuristic that is constantly
+ * in flux. Being quickly able to detect the busy/idle state is much
+ * more important than accurate logging of exactly which engines were
+ * busy.
+ *
+ * For accuracy in reporting the engine, we could use
+ *
+ * result = 0;
+ * request = __i915_gem_active_get_rcu(active);
+ * if (request) {
+ * if (!i915_gem_request_completed(request))
+ * result = flag(request->engine->exec_id);
+ * i915_gem_request_put(request);
+ * }
+ *
+ * but that still remains susceptible to both hardware and userspace
+ * races. So we accept making the result of that race slightly worse,
+ * given the rarity of the race and its low impact on the result.
+ */
+ return flag(READ_ONCE(request->engine->exec_id));
+}
- WARN_ON(vma->pin_count == 0);
- WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
+static __always_inline unsigned int
+busy_check_reader(const struct i915_gem_active *active)
+{
+ return __busy_set_if_active(active, __busy_read_flag);
+}
- --vma->pin_count;
+static __always_inline unsigned int
+busy_check_writer(const struct i915_gem_active *active)
+{
+ return __busy_set_if_active(active, __busy_write_id);
}
int
{
struct drm_i915_gem_busy *args = data;
struct drm_i915_gem_object *obj;
- int ret;
+ unsigned long active;
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ret;
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
- obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
- if (&obj->base == NULL) {
- ret = -ENOENT;
- goto unlock;
- }
+ args->busy = 0;
+ active = __I915_BO_ACTIVE(obj);
+ if (active) {
+ int idx;
- /* Count all active objects as busy, even if they are currently not used
- * by the gpu. Users of this interface expect objects to eventually
- * become non-busy without any further actions, therefore emit any
- * necessary flushes here.
- */
- ret = i915_gem_object_flush_active(obj);
- if (ret)
- goto unref;
+ /* Yes, the lookups are intentionally racy.
+ *
+ * First, we cannot simply rely on __I915_BO_ACTIVE. We have
+ * to regard the value as stale and as our ABI guarantees
+ * forward progress, we confirm the status of each active
+ * request with the hardware.
+ *
+ * Even though we guard the pointer lookup by RCU, that only
+ * guarantees that the pointer and its contents remain
+ * dereferencable and does *not* mean that the request we
+ * have is the same as the one being tracked by the object.
+ *
+ * Consider that we lookup the request just as it is being
+ * retired and freed. We take a local copy of the pointer,
+ * but before we add its engine into the busy set, the other
+ * thread reallocates it and assigns it to a task on another
+ * engine with a fresh and incomplete seqno. Guarding against
+ * that requires careful serialisation and reference counting,
+ * i.e. using __i915_gem_active_get_request_rcu(). We don't,
+ * instead we expect that if the result is busy, which engines
+ * are busy is not completely reliable - we only guarantee
+ * that the object was busy.
+ */
+ rcu_read_lock();
- args->busy = 0;
- if (obj->active) {
- int i;
+ for_each_active(active, idx)
+ args->busy |= busy_check_reader(&obj->last_read[idx]);
- for (i = 0; i < I915_NUM_ENGINES; i++) {
- struct drm_i915_gem_request *req;
+ /* For ABI sanity, we only care that the write engine is in
+ * the set of read engines. This should be ensured by the
+ * ordering of setting last_read/last_write in
+ * i915_vma_move_to_active(), and then in reverse in retire.
+ * However, for good measure, we always report the last_write
+ * request as a busy read as well as being a busy write.
+ *
+ * We don't care that the set of active read/write engines
+ * may change during construction of the result, as it is
+ * equally liable to change before userspace can inspect
+ * the result.
+ */
+ args->busy |= busy_check_writer(&obj->last_write);
- req = obj->last_read_req[i];
- if (req)
- args->busy |= 1 << (16 + req->engine->exec_id);
- }
- if (obj->last_write_req)
- args->busy |= obj->last_write_req->engine->exec_id;
+ rcu_read_unlock();
}
-unref:
- drm_gem_object_unreference(&obj->base);
-unlock:
- mutex_unlock(&dev->struct_mutex);
- return ret;
+ i915_gem_object_put_unlocked(obj);
+ return 0;
}
int
if (ret)
return ret;
- obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle));
- if (&obj->base == NULL) {
+ obj = i915_gem_object_lookup(file_priv, args->handle);
+ if (!obj) {
ret = -ENOENT;
goto unlock;
}
- if (i915_gem_obj_is_pinned(obj)) {
- ret = -EINVAL;
- goto out;
- }
-
if (obj->pages &&
- obj->tiling_mode != I915_TILING_NONE &&
+ i915_gem_object_is_tiled(obj) &&
dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
if (obj->madv == I915_MADV_WILLNEED)
i915_gem_object_unpin_pages(obj);
args->retained = obj->madv != __I915_MADV_PURGED;
-out:
- drm_gem_object_unreference(&obj->base);
+ i915_gem_object_put(obj);
unlock:
mutex_unlock(&dev->struct_mutex);
return ret;
INIT_LIST_HEAD(&obj->global_list);
for (i = 0; i < I915_NUM_ENGINES; i++)
- INIT_LIST_HEAD(&obj->engine_list[i]);
+ init_request_active(&obj->last_read[i],
+ i915_gem_object_retire__read);
+ init_request_active(&obj->last_write,
+ i915_gem_object_retire__write);
INIT_LIST_HEAD(&obj->obj_exec_link);
INIT_LIST_HEAD(&obj->vma_list);
INIT_LIST_HEAD(&obj->batch_pool_link);
obj->ops = ops;
- obj->fence_reg = I915_FENCE_REG_NONE;
+ obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
obj->madv = I915_MADV_WILLNEED;
i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
trace_i915_gem_object_destroy(obj);
+ /* All file-owned VMA should have been released by this point through
+ * i915_gem_close_object(), or earlier by i915_gem_context_close().
+ * However, the object may also be bound into the global GTT (e.g.
+ * older GPUs without per-process support, or for direct access through
+ * the GTT either for the user or for scanout). Those VMA still need to
+ * unbound now.
+ */
list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
- int ret;
-
- vma->pin_count = 0;
- ret = i915_vma_unbind(vma);
- if (WARN_ON(ret == -ERESTARTSYS)) {
- bool was_interruptible;
-
- was_interruptible = dev_priv->mm.interruptible;
- dev_priv->mm.interruptible = false;
-
- WARN_ON(i915_vma_unbind(vma));
-
- dev_priv->mm.interruptible = was_interruptible;
- }
+ GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+ GEM_BUG_ON(i915_vma_is_active(vma));
+ vma->flags &= ~I915_VMA_PIN_MASK;
+ i915_vma_close(vma);
}
+ GEM_BUG_ON(obj->bind_count);
/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
* before progressing. */
if (obj->stolen)
i915_gem_object_unpin_pages(obj);
- WARN_ON(obj->frontbuffer_bits);
+ WARN_ON(atomic_read(&obj->frontbuffer_bits));
if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
- obj->tiling_mode != I915_TILING_NONE)
+ i915_gem_object_is_tiled(obj))
i915_gem_object_unpin_pages(obj);
if (WARN_ON(obj->pages_pin_count))
if (discard_backing_storage(obj))
obj->madv = I915_MADV_DONTNEED;
i915_gem_object_put_pages(obj);
- i915_gem_object_free_mmap_offset(obj);
BUG_ON(obj->pages);
intel_runtime_pm_put(dev_priv);
}
-struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm)
-{
- struct i915_vma *vma;
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
- if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
- vma->vm == vm)
- return vma;
- }
- return NULL;
-}
-
-struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view)
-{
- struct i915_vma *vma;
-
- GEM_BUG_ON(!view);
-
- list_for_each_entry(vma, &obj->vma_list, obj_link)
- if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
- return vma;
- return NULL;
-}
-
-void i915_gem_vma_destroy(struct i915_vma *vma)
-{
- WARN_ON(vma->node.allocated);
-
- /* Keep the vma as a placeholder in the execbuffer reservation lists */
- if (!list_empty(&vma->exec_list))
- return;
-
- if (!vma->is_ggtt)
- i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
-
- list_del(&vma->obj_link);
-
- kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
-}
-
-static void
-i915_gem_stop_engines(struct drm_device *dev)
+int i915_gem_suspend(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
- struct intel_engine_cs *engine;
-
- for_each_engine(engine, dev_priv)
- dev_priv->gt.stop_engine(engine);
-}
+ int ret;
-int
-i915_gem_suspend(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = to_i915(dev);
- int ret = 0;
+ intel_suspend_gt_powersave(dev_priv);
mutex_lock(&dev->struct_mutex);
- ret = i915_gem_wait_for_idle(dev_priv);
+
+ /* We have to flush all the executing contexts to main memory so
+ * that they can saved in the hibernation image. To ensure the last
+ * context image is coherent, we have to switch away from it. That
+ * leaves the dev_priv->kernel_context still active when
+ * we actually suspend, and its image in memory may not match the GPU
+ * state. Fortunately, the kernel_context is disposable and we do
+ * not rely on its state.
+ */
+ ret = i915_gem_switch_to_kernel_context(dev_priv);
+ if (ret)
+ goto err;
+
+ ret = i915_gem_wait_for_idle(dev_priv,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED);
if (ret)
goto err;
i915_gem_retire_requests(dev_priv);
- i915_gem_stop_engines(dev);
i915_gem_context_lost(dev_priv);
mutex_unlock(&dev->struct_mutex);
return ret;
}
+void i915_gem_resume(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = to_i915(dev);
+
+ mutex_lock(&dev->struct_mutex);
+ i915_gem_restore_gtt_mappings(dev);
+
+ /* As we didn't flush the kernel context before suspend, we cannot
+ * guarantee that the context image is complete. So let's just reset
+ * it and start again.
+ */
+ dev_priv->gt.resume(dev_priv);
+
+ mutex_unlock(&dev->struct_mutex);
+}
+
void i915_gem_init_swizzling(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
}
}
-int i915_gem_init_engines(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = to_i915(dev);
- int ret;
-
- ret = intel_init_render_ring_buffer(dev);
- if (ret)
- return ret;
-
- if (HAS_BSD(dev)) {
- ret = intel_init_bsd_ring_buffer(dev);
- if (ret)
- goto cleanup_render_ring;
- }
-
- if (HAS_BLT(dev)) {
- ret = intel_init_blt_ring_buffer(dev);
- if (ret)
- goto cleanup_bsd_ring;
- }
-
- if (HAS_VEBOX(dev)) {
- ret = intel_init_vebox_ring_buffer(dev);
- if (ret)
- goto cleanup_blt_ring;
- }
-
- if (HAS_BSD2(dev)) {
- ret = intel_init_bsd2_ring_buffer(dev);
- if (ret)
- goto cleanup_vebox_ring;
- }
-
- return 0;
-
-cleanup_vebox_ring:
- intel_cleanup_engine(&dev_priv->engine[VECS]);
-cleanup_blt_ring:
- intel_cleanup_engine(&dev_priv->engine[BCS]);
-cleanup_bsd_ring:
- intel_cleanup_engine(&dev_priv->engine[VCS]);
-cleanup_render_ring:
- intel_cleanup_engine(&dev_priv->engine[RCS]);
-
- return ret;
-}
-
int
i915_gem_init_hw(struct drm_device *dev)
{
return ret;
}
+bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
+{
+ if (INTEL_INFO(dev_priv)->gen < 6)
+ return false;
+
+ /* TODO: make semaphores and Execlists play nicely together */
+ if (i915.enable_execlists)
+ return false;
+
+ if (value >= 0)
+ return value;
+
+#ifdef CONFIG_INTEL_IOMMU
+ /* Enable semaphores on SNB when IO remapping is off */
+ if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
+ return false;
+#endif
+
+ return true;
+}
+
int i915_gem_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
mutex_lock(&dev->struct_mutex);
if (!i915.enable_execlists) {
- dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
- dev_priv->gt.init_engines = i915_gem_init_engines;
- dev_priv->gt.cleanup_engine = intel_cleanup_engine;
- dev_priv->gt.stop_engine = intel_stop_engine;
+ dev_priv->gt.resume = intel_legacy_submission_resume;
+ dev_priv->gt.cleanup_engine = intel_engine_cleanup;
} else {
- dev_priv->gt.execbuf_submit = intel_execlists_submission;
- dev_priv->gt.init_engines = intel_logical_rings_init;
+ dev_priv->gt.resume = intel_lr_context_resume;
dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
- dev_priv->gt.stop_engine = intel_logical_ring_stop;
}
/* This is just a security blanket to placate dragons.
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
i915_gem_init_userptr(dev_priv);
- i915_gem_init_ggtt(dev);
+
+ ret = i915_gem_init_ggtt(dev_priv);
+ if (ret)
+ goto out_unlock;
ret = i915_gem_context_init(dev);
if (ret)
goto out_unlock;
- ret = dev_priv->gt.init_engines(dev);
+ ret = intel_engines_init(dev);
if (ret)
goto out_unlock;
ret = i915_gem_init_hw(dev);
if (ret == -EIO) {
- /* Allow ring initialisation to fail by marking the GPU as
+ /* Allow engine initialisation to fail by marking the GPU as
* wedged. But we only want to do this where the GPU is angry,
* for all other failure, such as an allocation failure, bail.
*/
DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
- atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+ i915_gem_set_wedged(dev_priv);
ret = 0;
}
static void
init_engine_lists(struct intel_engine_cs *engine)
{
- INIT_LIST_HEAD(&engine->active_list);
INIT_LIST_HEAD(&engine->request_list);
}
i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = &dev_priv->drm;
+ int i;
if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
!IS_CHERRYVIEW(dev_priv))
I915_READ(vgtif_reg(avail_rs.fence_num));
/* Initialize fence registers to zero */
+ for (i = 0; i < dev_priv->num_fence_regs; i++) {
+ struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
+
+ fence->i915 = dev_priv;
+ fence->id = i;
+ list_add_tail(&fence->link, &dev_priv->mm.fence_list);
+ }
i915_gem_restore_fences(dev);
i915_gem_detect_bit_6_swizzle(dev);
dev_priv->requests =
kmem_cache_create("i915_gem_request",
sizeof(struct drm_i915_gem_request), 0,
- SLAB_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_RECLAIM_ACCOUNT |
+ SLAB_DESTROY_BY_RCU,
NULL);
- INIT_LIST_HEAD(&dev_priv->vm_list);
INIT_LIST_HEAD(&dev_priv->context_list);
INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
INIT_LIST_HEAD(&dev_priv->mm.bound_list);
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
for (i = 0; i < I915_NUM_ENGINES; i++)
init_engine_lists(&dev_priv->engine[i]);
- for (i = 0; i < I915_MAX_NUM_FENCES; i++)
- INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
i915_gem_retire_work_handler);
INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
- INIT_LIST_HEAD(&dev_priv->mm.fence_list);
-
init_waitqueue_head(&dev_priv->pending_flip_queue);
dev_priv->mm.interruptible = true;
- mutex_init(&dev_priv->fb_tracking.lock);
+ atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
+
+ spin_lock_init(&dev_priv->fb_tracking.lock);
}
void i915_gem_load_cleanup(struct drm_device *dev)
kmem_cache_destroy(dev_priv->requests);
kmem_cache_destroy(dev_priv->vmas);
kmem_cache_destroy(dev_priv->objects);
+
+ /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
+ rcu_barrier();
}
int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
{
struct drm_i915_gem_object *obj;
+ struct list_head *phases[] = {
+ &dev_priv->mm.unbound_list,
+ &dev_priv->mm.bound_list,
+ NULL
+ }, **p;
/* Called just before we write the hibernation image.
*
*
* To make sure the hibernation image contains the latest state,
* we update that state just before writing out the image.
+ *
+ * To try and reduce the hibernation image, we manually shrink
+ * the objects as well.
*/
- list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
- obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- }
+ i915_gem_shrink_all(dev_priv);
- list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ for (p = phases; *p; p++) {
+ list_for_each_entry(obj, *p, global_list) {
+ obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ }
}
return 0;
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
{
struct drm_i915_file_private *file_priv = file->driver_priv;
+ struct drm_i915_gem_request *request;
/* Clean up our request list when the client is going away, so that
* later retire_requests won't dereference our soon-to-be-gone
* file_priv.
*/
spin_lock(&file_priv->mm.lock);
- while (!list_empty(&file_priv->mm.request_list)) {
- struct drm_i915_gem_request *request;
-
- request = list_first_entry(&file_priv->mm.request_list,
- struct drm_i915_gem_request,
- client_list);
- list_del(&request->client_list);
+ list_for_each_entry(request, &file_priv->mm.request_list, client_list)
request->file_priv = NULL;
- }
spin_unlock(&file_priv->mm.lock);
if (!list_empty(&file_priv->rps.link)) {
spin_lock_init(&file_priv->mm.lock);
INIT_LIST_HEAD(&file_priv->mm.request_list);
- file_priv->bsd_ring = -1;
+ file_priv->bsd_engine = -1;
ret = i915_gem_context_open(dev, file);
if (ret)
struct drm_i915_gem_object *new,
unsigned frontbuffer_bits)
{
+ /* Control of individual bits within the mask are guarded by
+ * the owning plane->mutex, i.e. we can never see concurrent
+ * manipulation of individual bits. But since the bitfield as a whole
+ * is updated using RMW, we need to use atomics in order to update
+ * the bits.
+ */
+ BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
+ sizeof(atomic_t) * BITS_PER_BYTE);
+
if (old) {
- WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
- WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
- old->frontbuffer_bits &= ~frontbuffer_bits;
+ WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
+ atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
}
if (new) {
- WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
- WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
- new->frontbuffer_bits |= frontbuffer_bits;
- }
-}
-
-/* All the new VM stuff */
-u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
- struct i915_address_space *vm)
-{
- struct drm_i915_private *dev_priv = to_i915(o->base.dev);
- struct i915_vma *vma;
-
- WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
-
- list_for_each_entry(vma, &o->vma_list, obj_link) {
- if (vma->is_ggtt &&
- vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
- continue;
- if (vma->vm == vm)
- return vma->node.start;
- }
-
- WARN(1, "%s vma for this object not found.\n",
- i915_is_ggtt(vm) ? "global" : "ppgtt");
- return -1;
-}
-
-u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
- const struct i915_ggtt_view *view)
-{
- struct i915_vma *vma;
-
- list_for_each_entry(vma, &o->vma_list, obj_link)
- if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
- return vma->node.start;
-
- WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
- return -1;
-}
-
-bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
- struct i915_address_space *vm)
-{
- struct i915_vma *vma;
-
- list_for_each_entry(vma, &o->vma_list, obj_link) {
- if (vma->is_ggtt &&
- vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
- continue;
- if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
- return true;
+ WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
+ atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
}
-
- return false;
-}
-
-bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
- const struct i915_ggtt_view *view)
-{
- struct i915_vma *vma;
-
- list_for_each_entry(vma, &o->vma_list, obj_link)
- if (vma->is_ggtt &&
- i915_ggtt_view_equal(&vma->ggtt_view, view) &&
- drm_mm_node_allocated(&vma->node))
- return true;
-
- return false;
-}
-
-bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
-{
- struct i915_vma *vma;
-
- list_for_each_entry(vma, &o->vma_list, obj_link)
- if (drm_mm_node_allocated(&vma->node))
- return true;
-
- return false;
-}
-
-unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
-{
- struct i915_vma *vma;
-
- GEM_BUG_ON(list_empty(&o->vma_list));
-
- list_for_each_entry(vma, &o->vma_list, obj_link) {
- if (vma->is_ggtt &&
- vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
- return vma->node.size;
- }
-
- return 0;
-}
-
-bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
-{
- struct i915_vma *vma;
- list_for_each_entry(vma, &obj->vma_list, obj_link)
- if (vma->pin_count > 0)
- return true;
-
- return false;
}
/* Like i915_gem_object_get_page(), but mark the returned page dirty */
return obj;
fail:
- drm_gem_object_unreference(&obj->base);
+ i915_gem_object_put(obj);
return ERR_PTR(ret);
}
#include <drm/drm_edid.h>
#include <drm/drmP.h>
#include "intel_drv.h"
+#include "intel_frontbuffer.h"
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_gem_dmabuf.h"
if (HAS_PCH_SPLIT(dev)) {
u32 port_sel;
- pp_reg = PCH_PP_CONTROL;
- port_sel = I915_READ(PCH_PP_ON_DELAYS) & PANEL_PORT_SELECT_MASK;
+ pp_reg = PP_CONTROL(0);
+ port_sel = I915_READ(PP_ON_DELAYS(0)) & PANEL_PORT_SELECT_MASK;
if (port_sel == PANEL_PORT_SELECT_LVDS &&
I915_READ(PCH_LVDS) & LVDS_PIPEB_SELECT)
/* XXX: else fix for eDP */
} else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
/* presumably write lock depends on pipe, not port select */
- pp_reg = VLV_PIPE_PP_CONTROL(pipe);
+ pp_reg = PP_CONTROL(pipe);
panel_pipe = pipe;
} else {
- pp_reg = PP_CONTROL;
+ pp_reg = PP_CONTROL(0);
if (I915_READ(LVDS) & LVDS_PIPEB_SELECT)
panel_pipe = PIPE_B;
}
}
}
-static void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv)
+void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv)
{
u32 val;
* a plane. On ILK+ the pipe PLLs are integrated, so we don't
* need the check.
*/
- if (HAS_GMCH_DISPLAY(dev_priv))
+ if (HAS_GMCH_DISPLAY(dev_priv)) {
if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI))
assert_dsi_pll_enabled(dev_priv);
else
assert_pll_enabled(dev_priv, pipe);
- else {
+ } else {
if (crtc->config->has_pch_encoder) {
/* if driving the PCH, we need FDI enabled */
assert_fdi_rx_pll_enabled(dev_priv, pch_transcoder);
}
}
-static void
-intel_fill_fb_info(struct drm_i915_private *dev_priv,
- struct drm_framebuffer *fb)
-{
- struct intel_rotation_info *info = &to_intel_framebuffer(fb)->rot_info;
- unsigned int tile_size, tile_width, tile_height, cpp;
-
- tile_size = intel_tile_size(dev_priv);
-
- cpp = drm_format_plane_cpp(fb->pixel_format, 0);
- intel_tile_dims(dev_priv, &tile_width, &tile_height,
- fb->modifier[0], cpp);
-
- info->plane[0].width = DIV_ROUND_UP(fb->pitches[0], tile_width * cpp);
- info->plane[0].height = DIV_ROUND_UP(fb->height, tile_height);
-
- if (info->pixel_format == DRM_FORMAT_NV12) {
- cpp = drm_format_plane_cpp(fb->pixel_format, 1);
- intel_tile_dims(dev_priv, &tile_width, &tile_height,
- fb->modifier[1], cpp);
-
- info->uv_offset = fb->offsets[1];
- info->plane[1].width = DIV_ROUND_UP(fb->pitches[1], tile_width * cpp);
- info->plane[1].height = DIV_ROUND_UP(fb->height / 2, tile_height);
- }
-}
-
static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv)
{
if (INTEL_INFO(dev_priv)->gen >= 9)
}
}
-int
-intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
- unsigned int rotation)
+struct i915_vma *
+intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
{
struct drm_device *dev = fb->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
struct i915_ggtt_view view;
+ struct i915_vma *vma;
u32 alignment;
- int ret;
WARN_ON(!mutex_is_locked(&dev->struct_mutex));
*/
intel_runtime_pm_get(dev_priv);
- ret = i915_gem_object_pin_to_display_plane(obj, alignment,
- &view);
- if (ret)
- goto err_pm;
-
- /* Install a fence for tiled scan-out. Pre-i965 always needs a
- * fence, whereas 965+ only requires a fence if using
- * framebuffer compression. For simplicity, we always install
- * a fence as the cost is not that onerous.
- */
- if (view.type == I915_GGTT_VIEW_NORMAL) {
- ret = i915_gem_object_get_fence(obj);
- if (ret == -EDEADLK) {
- /*
- * -EDEADLK means there are no free fences
- * no pending flips.
- *
- * This is propagated to atomic, but it uses
- * -EDEADLK to force a locking recovery, so
- * change the returned error to -EBUSY.
- */
- ret = -EBUSY;
- goto err_unpin;
- } else if (ret)
- goto err_unpin;
+ vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
+ if (IS_ERR(vma))
+ goto err;
- i915_gem_object_pin_fence(obj);
+ if (i915_vma_is_map_and_fenceable(vma)) {
+ /* Install a fence for tiled scan-out. Pre-i965 always needs a
+ * fence, whereas 965+ only requires a fence if using
+ * framebuffer compression. For simplicity, we always, when
+ * possible, install a fence as the cost is not that onerous.
+ *
+ * If we fail to fence the tiled scanout, then either the
+ * modeset will reject the change (which is highly unlikely as
+ * the affected systems, all but one, do not have unmappable
+ * space) or we will not be able to enable full powersaving
+ * techniques (also likely not to apply due to various limits
+ * FBC and the like impose on the size of the buffer, which
+ * presumably we violated anyway with this unmappable buffer).
+ * Anyway, it is presumably better to stumble onwards with
+ * something and try to run the system in a "less than optimal"
+ * mode that matches the user configuration.
+ */
+ if (i915_vma_get_fence(vma) == 0)
+ i915_vma_pin_fence(vma);
}
+err:
intel_runtime_pm_put(dev_priv);
- return 0;
-
-err_unpin:
- i915_gem_object_unpin_from_display_plane(obj, &view);
-err_pm:
- intel_runtime_pm_put(dev_priv);
- return ret;
+ return vma;
}
void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
{
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
struct i915_ggtt_view view;
+ struct i915_vma *vma;
WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
intel_fill_fb_ggtt_view(&view, fb, rotation);
+ vma = i915_gem_object_to_ggtt(obj, &view);
- if (view.type == I915_GGTT_VIEW_NORMAL)
- i915_gem_object_unpin_fence(obj);
+ i915_vma_unpin_fence(vma);
+ i915_gem_object_unpin_from_display_plane(vma);
+}
- i915_gem_object_unpin_from_display_plane(obj, &view);
+static int intel_fb_pitch(const struct drm_framebuffer *fb, int plane,
+ unsigned int rotation)
+{
+ if (intel_rotation_90_or_270(rotation))
+ return to_intel_framebuffer(fb)->rotated[plane].pitch;
+ else
+ return fb->pitches[plane];
+}
+
+/*
+ * Convert the x/y offsets into a linear offset.
+ * Only valid with 0/180 degree rotation, which is fine since linear
+ * offset is only used with linear buffers on pre-hsw and tiled buffers
+ * with gen2/3, and 90/270 degree rotations isn't supported on any of them.
+ */
+u32 intel_fb_xy_to_linear(int x, int y,
+ const struct intel_plane_state *state,
+ int plane)
+{
+ const struct drm_framebuffer *fb = state->base.fb;
+ unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+ unsigned int pitch = fb->pitches[plane];
+
+ return y * pitch + x * cpp;
+}
+
+/*
+ * Add the x/y offsets derived from fb->offsets[] to the user
+ * specified plane src x/y offsets. The resulting x/y offsets
+ * specify the start of scanout from the beginning of the gtt mapping.
+ */
+void intel_add_fb_offsets(int *x, int *y,
+ const struct intel_plane_state *state,
+ int plane)
+
+{
+ const struct intel_framebuffer *intel_fb = to_intel_framebuffer(state->base.fb);
+ unsigned int rotation = state->base.rotation;
+
+ if (intel_rotation_90_or_270(rotation)) {
+ *x += intel_fb->rotated[plane].x;
+ *y += intel_fb->rotated[plane].y;
+ } else {
+ *x += intel_fb->normal[plane].x;
+ *y += intel_fb->normal[plane].y;
+ }
}
/*
- * Adjust the tile offset by moving the difference into
- * the x/y offsets.
- *
* Input tile dimensions and pitch must already be
* rotated to match x and y, and in pixel units.
*/
-static u32 intel_adjust_tile_offset(int *x, int *y,
- unsigned int tile_width,
- unsigned int tile_height,
- unsigned int tile_size,
- unsigned int pitch_tiles,
- u32 old_offset,
- u32 new_offset)
-{
+static u32 _intel_adjust_tile_offset(int *x, int *y,
+ unsigned int tile_width,
+ unsigned int tile_height,
+ unsigned int tile_size,
+ unsigned int pitch_tiles,
+ u32 old_offset,
+ u32 new_offset)
+{
+ unsigned int pitch_pixels = pitch_tiles * tile_width;
unsigned int tiles;
WARN_ON(old_offset & (tile_size - 1));
*y += tiles / pitch_tiles * tile_height;
*x += tiles % pitch_tiles * tile_width;
+ /* minimize x in case it got needlessly big */
+ *y += *x / pitch_pixels * tile_height;
+ *x %= pitch_pixels;
+
+ return new_offset;
+}
+
+/*
+ * Adjust the tile offset by moving the difference into
+ * the x/y offsets.
+ */
+static u32 intel_adjust_tile_offset(int *x, int *y,
+ const struct intel_plane_state *state, int plane,
+ u32 old_offset, u32 new_offset)
+{
+ const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
+ const struct drm_framebuffer *fb = state->base.fb;
+ unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+ unsigned int rotation = state->base.rotation;
+ unsigned int pitch = intel_fb_pitch(fb, plane, rotation);
+
+ WARN_ON(new_offset > old_offset);
+
+ if (fb->modifier[plane] != DRM_FORMAT_MOD_NONE) {
+ unsigned int tile_size, tile_width, tile_height;
+ unsigned int pitch_tiles;
+
+ tile_size = intel_tile_size(dev_priv);
+ intel_tile_dims(dev_priv, &tile_width, &tile_height,
+ fb->modifier[plane], cpp);
+
+ if (intel_rotation_90_or_270(rotation)) {
+ pitch_tiles = pitch / tile_height;
+ swap(tile_width, tile_height);
+ } else {
+ pitch_tiles = pitch / (tile_width * cpp);
+ }
+
+ _intel_adjust_tile_offset(x, y, tile_width, tile_height,
+ tile_size, pitch_tiles,
+ old_offset, new_offset);
+ } else {
+ old_offset += *y * pitch + *x * cpp;
+
+ *y = (old_offset - new_offset) / pitch;
+ *x = ((old_offset - new_offset) - *y * pitch) / cpp;
+ }
+
return new_offset;
}
* In the 90/270 rotated case, x and y are assumed
* to be already rotated to match the rotated GTT view, and
* pitch is the tile_height aligned framebuffer height.
+ *
+ * This function is used when computing the derived information
+ * under intel_framebuffer, so using any of that information
+ * here is not allowed. Anything under drm_framebuffer can be
+ * used. This is why the user has to pass in the pitch since it
+ * is specified in the rotated orientation.
*/
-u32 intel_compute_tile_offset(int *x, int *y,
- const struct drm_framebuffer *fb, int plane,
- unsigned int pitch,
- unsigned int rotation)
+static u32 _intel_compute_tile_offset(const struct drm_i915_private *dev_priv,
+ int *x, int *y,
+ const struct drm_framebuffer *fb, int plane,
+ unsigned int pitch,
+ unsigned int rotation,
+ u32 alignment)
{
- const struct drm_i915_private *dev_priv = to_i915(fb->dev);
uint64_t fb_modifier = fb->modifier[plane];
unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
- u32 offset, offset_aligned, alignment;
+ u32 offset, offset_aligned;
- alignment = intel_surf_alignment(dev_priv, fb_modifier);
if (alignment)
alignment--;
offset = (tile_rows * pitch_tiles + tiles) * tile_size;
offset_aligned = offset & ~alignment;
- intel_adjust_tile_offset(x, y, tile_width, tile_height,
- tile_size, pitch_tiles,
- offset, offset_aligned);
+ _intel_adjust_tile_offset(x, y, tile_width, tile_height,
+ tile_size, pitch_tiles,
+ offset, offset_aligned);
} else {
offset = *y * pitch + *x * cpp;
offset_aligned = offset & ~alignment;
return offset_aligned;
}
+u32 intel_compute_tile_offset(int *x, int *y,
+ const struct intel_plane_state *state,
+ int plane)
+{
+ const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
+ const struct drm_framebuffer *fb = state->base.fb;
+ unsigned int rotation = state->base.rotation;
+ int pitch = intel_fb_pitch(fb, plane, rotation);
+ u32 alignment;
+
+ /* AUX_DIST needs only 4K alignment */
+ if (fb->pixel_format == DRM_FORMAT_NV12 && plane == 1)
+ alignment = 4096;
+ else
+ alignment = intel_surf_alignment(dev_priv, fb->modifier[plane]);
+
+ return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch,
+ rotation, alignment);
+}
+
+/* Convert the fb->offset[] linear offset into x/y offsets */
+static void intel_fb_offset_to_xy(int *x, int *y,
+ const struct drm_framebuffer *fb, int plane)
+{
+ unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+ unsigned int pitch = fb->pitches[plane];
+ u32 linear_offset = fb->offsets[plane];
+
+ *y = linear_offset / pitch;
+ *x = linear_offset % pitch / cpp;
+}
+
+static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier)
+{
+ switch (fb_modifier) {
+ case I915_FORMAT_MOD_X_TILED:
+ return I915_TILING_X;
+ case I915_FORMAT_MOD_Y_TILED:
+ return I915_TILING_Y;
+ default:
+ return I915_TILING_NONE;
+ }
+}
+
+static int
+intel_fill_fb_info(struct drm_i915_private *dev_priv,
+ struct drm_framebuffer *fb)
+{
+ struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+ struct intel_rotation_info *rot_info = &intel_fb->rot_info;
+ u32 gtt_offset_rotated = 0;
+ unsigned int max_size = 0;
+ uint32_t format = fb->pixel_format;
+ int i, num_planes = drm_format_num_planes(format);
+ unsigned int tile_size = intel_tile_size(dev_priv);
+
+ for (i = 0; i < num_planes; i++) {
+ unsigned int width, height;
+ unsigned int cpp, size;
+ u32 offset;
+ int x, y;
+
+ cpp = drm_format_plane_cpp(format, i);
+ width = drm_format_plane_width(fb->width, format, i);
+ height = drm_format_plane_height(fb->height, format, i);
+
+ intel_fb_offset_to_xy(&x, &y, fb, i);
+
+ /*
+ * The fence (if used) is aligned to the start of the object
+ * so having the framebuffer wrap around across the edge of the
+ * fenced region doesn't really work. We have no API to configure
+ * the fence start offset within the object (nor could we probably
+ * on gen2/3). So it's just easier if we just require that the
+ * fb layout agrees with the fence layout. We already check that the
+ * fb stride matches the fence stride elsewhere.
+ */
+ if (i915_gem_object_is_tiled(intel_fb->obj) &&
+ (x + width) * cpp > fb->pitches[i]) {
+ DRM_DEBUG("bad fb plane %d offset: 0x%x\n",
+ i, fb->offsets[i]);
+ return -EINVAL;
+ }
+
+ /*
+ * First pixel of the framebuffer from
+ * the start of the normal gtt mapping.
+ */
+ intel_fb->normal[i].x = x;
+ intel_fb->normal[i].y = y;
+
+ offset = _intel_compute_tile_offset(dev_priv, &x, &y,
+ fb, 0, fb->pitches[i],
+ DRM_ROTATE_0, tile_size);
+ offset /= tile_size;
+
+ if (fb->modifier[i] != DRM_FORMAT_MOD_NONE) {
+ unsigned int tile_width, tile_height;
+ unsigned int pitch_tiles;
+ struct drm_rect r;
+
+ intel_tile_dims(dev_priv, &tile_width, &tile_height,
+ fb->modifier[i], cpp);
+
+ rot_info->plane[i].offset = offset;
+ rot_info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], tile_width * cpp);
+ rot_info->plane[i].width = DIV_ROUND_UP(x + width, tile_width);
+ rot_info->plane[i].height = DIV_ROUND_UP(y + height, tile_height);
+
+ intel_fb->rotated[i].pitch =
+ rot_info->plane[i].height * tile_height;
+
+ /* how many tiles does this plane need */
+ size = rot_info->plane[i].stride * rot_info->plane[i].height;
+ /*
+ * If the plane isn't horizontally tile aligned,
+ * we need one more tile.
+ */
+ if (x != 0)
+ size++;
+
+ /* rotate the x/y offsets to match the GTT view */
+ r.x1 = x;
+ r.y1 = y;
+ r.x2 = x + width;
+ r.y2 = y + height;
+ drm_rect_rotate(&r,
+ rot_info->plane[i].width * tile_width,
+ rot_info->plane[i].height * tile_height,
+ DRM_ROTATE_270);
+ x = r.x1;
+ y = r.y1;
+
+ /* rotate the tile dimensions to match the GTT view */
+ pitch_tiles = intel_fb->rotated[i].pitch / tile_height;
+ swap(tile_width, tile_height);
+
+ /*
+ * We only keep the x/y offsets, so push all of the
+ * gtt offset into the x/y offsets.
+ */
+ _intel_adjust_tile_offset(&x, &y, tile_size,
+ tile_width, tile_height, pitch_tiles,
+ gtt_offset_rotated * tile_size, 0);
+
+ gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height;
+
+ /*
+ * First pixel of the framebuffer from
+ * the start of the rotated gtt mapping.
+ */
+ intel_fb->rotated[i].x = x;
+ intel_fb->rotated[i].y = y;
+ } else {
+ size = DIV_ROUND_UP((y + height) * fb->pitches[i] +
+ x * cpp, tile_size);
+ }
+
+ /* how many tiles in total needed in the bo */
+ max_size = max(max_size, offset + size);
+ }
+
+ if (max_size * tile_size > to_intel_framebuffer(fb)->obj->base.size) {
+ DRM_DEBUG("fb too big for bo (need %u bytes, have %zu bytes)\n",
+ max_size * tile_size, to_intel_framebuffer(fb)->obj->base.size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int i9xx_format_to_fourcc(int format)
{
switch (format) {
return false;
}
- obj->tiling_mode = plane_config->tiling;
- if (obj->tiling_mode == I915_TILING_X)
- obj->stride = fb->pitches[0];
+ if (plane_config->tiling == I915_TILING_X)
+ obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X;
mode_cmd.pixel_format = fb->pixel_format;
mode_cmd.width = fb->width;
return true;
out_unref_obj:
- drm_gem_object_unreference(&obj->base);
+ i915_gem_object_put(obj);
mutex_unlock(&dev->struct_mutex);
return false;
}
continue;
obj = intel_fb_obj(fb);
- if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) {
+ if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) {
drm_framebuffer_reference(fb);
goto valid_fb;
}
* simplest solution is to just disable the primary plane now and
* pretend the BIOS never had it enabled.
*/
- to_intel_plane_state(plane_state)->visible = false;
+ to_intel_plane_state(plane_state)->base.visible = false;
crtc_state->plane_mask &= ~(1 << drm_plane_index(primary));
intel_pre_disable_primary_noatomic(&intel_crtc->base);
intel_plane->disable_plane(primary, &intel_crtc->base);
plane_state->crtc_w = fb->width;
plane_state->crtc_h = fb->height;
- intel_state->src.x1 = plane_state->src_x;
- intel_state->src.y1 = plane_state->src_y;
- intel_state->src.x2 = plane_state->src_x + plane_state->src_w;
- intel_state->src.y2 = plane_state->src_y + plane_state->src_h;
- intel_state->dst.x1 = plane_state->crtc_x;
- intel_state->dst.y1 = plane_state->crtc_y;
- intel_state->dst.x2 = plane_state->crtc_x + plane_state->crtc_w;
- intel_state->dst.y2 = plane_state->crtc_y + plane_state->crtc_h;
+ intel_state->base.src.x1 = plane_state->src_x;
+ intel_state->base.src.y1 = plane_state->src_y;
+ intel_state->base.src.x2 = plane_state->src_x + plane_state->src_w;
+ intel_state->base.src.y2 = plane_state->src_y + plane_state->src_h;
+ intel_state->base.dst.x1 = plane_state->crtc_x;
+ intel_state->base.dst.y1 = plane_state->crtc_y;
+ intel_state->base.dst.x2 = plane_state->crtc_x + plane_state->crtc_w;
+ intel_state->base.dst.y2 = plane_state->crtc_y + plane_state->crtc_h;
obj = intel_fb_obj(fb);
- if (obj->tiling_mode != I915_TILING_NONE)
+ if (i915_gem_object_is_tiled(obj))
dev_priv->preserve_bios_swizzle = true;
drm_framebuffer_reference(fb);
primary->fb = primary->state->fb = fb;
primary->crtc = primary->state->crtc = &intel_crtc->base;
intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary));
- obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit;
+ atomic_or(to_intel_plane(primary)->frontbuffer_bit,
+ &obj->frontbuffer_bits);
+}
+
+static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane,
+ unsigned int rotation)
+{
+ int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+
+ switch (fb->modifier[plane]) {
+ case DRM_FORMAT_MOD_NONE:
+ case I915_FORMAT_MOD_X_TILED:
+ switch (cpp) {
+ case 8:
+ return 4096;
+ case 4:
+ case 2:
+ case 1:
+ return 8192;
+ default:
+ MISSING_CASE(cpp);
+ break;
+ }
+ break;
+ case I915_FORMAT_MOD_Y_TILED:
+ case I915_FORMAT_MOD_Yf_TILED:
+ switch (cpp) {
+ case 8:
+ return 2048;
+ case 4:
+ return 4096;
+ case 2:
+ case 1:
+ return 8192;
+ default:
+ MISSING_CASE(cpp);
+ break;
+ }
+ break;
+ default:
+ MISSING_CASE(fb->modifier[plane]);
+ }
+
+ return 2048;
+}
+
+static int skl_check_main_surface(struct intel_plane_state *plane_state)
+{
+ const struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev);
+ const struct drm_framebuffer *fb = plane_state->base.fb;
+ unsigned int rotation = plane_state->base.rotation;
+ int x = plane_state->base.src.x1 >> 16;
+ int y = plane_state->base.src.y1 >> 16;
+ int w = drm_rect_width(&plane_state->base.src) >> 16;
+ int h = drm_rect_height(&plane_state->base.src) >> 16;
+ int max_width = skl_max_plane_width(fb, 0, rotation);
+ int max_height = 4096;
+ u32 alignment, offset, aux_offset = plane_state->aux.offset;
+
+ if (w > max_width || h > max_height) {
+ DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n",
+ w, h, max_width, max_height);
+ return -EINVAL;
+ }
+
+ intel_add_fb_offsets(&x, &y, plane_state, 0);
+ offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
+
+ alignment = intel_surf_alignment(dev_priv, fb->modifier[0]);
+
+ /*
+ * AUX surface offset is specified as the distance from the
+ * main surface offset, and it must be non-negative. Make
+ * sure that is what we will get.
+ */
+ if (offset > aux_offset)
+ offset = intel_adjust_tile_offset(&x, &y, plane_state, 0,
+ offset, aux_offset & ~(alignment - 1));
+
+ /*
+ * When using an X-tiled surface, the plane blows up
+ * if the x offset + width exceed the stride.
+ *
+ * TODO: linear and Y-tiled seem fine, Yf untested,
+ */
+ if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) {
+ int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+
+ while ((x + w) * cpp > fb->pitches[0]) {
+ if (offset == 0) {
+ DRM_DEBUG_KMS("Unable to find suitable display surface offset\n");
+ return -EINVAL;
+ }
+
+ offset = intel_adjust_tile_offset(&x, &y, plane_state, 0,
+ offset, offset - alignment);
+ }
+ }
+
+ plane_state->main.offset = offset;
+ plane_state->main.x = x;
+ plane_state->main.y = y;
+
+ return 0;
+}
+
+static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state)
+{
+ const struct drm_framebuffer *fb = plane_state->base.fb;
+ unsigned int rotation = plane_state->base.rotation;
+ int max_width = skl_max_plane_width(fb, 1, rotation);
+ int max_height = 4096;
+ int x = plane_state->base.src.x1 >> 17;
+ int y = plane_state->base.src.y1 >> 17;
+ int w = drm_rect_width(&plane_state->base.src) >> 17;
+ int h = drm_rect_height(&plane_state->base.src) >> 17;
+ u32 offset;
+
+ intel_add_fb_offsets(&x, &y, plane_state, 1);
+ offset = intel_compute_tile_offset(&x, &y, plane_state, 1);
+
+ /* FIXME not quite sure how/if these apply to the chroma plane */
+ if (w > max_width || h > max_height) {
+ DRM_DEBUG_KMS("CbCr source size %dx%d too big (limit %dx%d)\n",
+ w, h, max_width, max_height);
+ return -EINVAL;
+ }
+
+ plane_state->aux.offset = offset;
+ plane_state->aux.x = x;
+ plane_state->aux.y = y;
+
+ return 0;
+}
+
+int skl_check_plane_surface(struct intel_plane_state *plane_state)
+{
+ const struct drm_framebuffer *fb = plane_state->base.fb;
+ unsigned int rotation = plane_state->base.rotation;
+ int ret;
+
+ /* Rotate src coordinates to match rotated GTT view */
+ if (intel_rotation_90_or_270(rotation))
+ drm_rect_rotate(&plane_state->base.src,
+ fb->width, fb->height, DRM_ROTATE_270);
+
+ /*
+ * Handle the AUX surface first since
+ * the main surface setup depends on it.
+ */
+ if (fb->pixel_format == DRM_FORMAT_NV12) {
+ ret = skl_check_nv12_aux_surface(plane_state);
+ if (ret)
+ return ret;
+ } else {
+ plane_state->aux.offset = ~0xfff;
+ plane_state->aux.x = 0;
+ plane_state->aux.y = 0;
+ }
+
+ ret = skl_check_main_surface(plane_state);
+ if (ret)
+ return ret;
+
+ return 0;
}
static void i9xx_update_primary_plane(struct drm_plane *primary,
u32 dspcntr;
i915_reg_t reg = DSPCNTR(plane);
unsigned int rotation = plane_state->base.rotation;
- int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
- int x = plane_state->src.x1 >> 16;
- int y = plane_state->src.y1 >> 16;
+ int x = plane_state->base.src.x1 >> 16;
+ int y = plane_state->base.src.y1 >> 16;
dspcntr = DISPPLANE_GAMMA_ENABLE;
BUG();
}
- if (INTEL_INFO(dev)->gen >= 4 &&
- obj->tiling_mode != I915_TILING_NONE)
+ if (INTEL_GEN(dev_priv) >= 4 &&
+ fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
dspcntr |= DISPPLANE_TILED;
if (IS_G4X(dev))
dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
- linear_offset = y * fb->pitches[0] + x * cpp;
+ intel_add_fb_offsets(&x, &y, plane_state, 0);
- if (INTEL_INFO(dev)->gen >= 4) {
+ if (INTEL_INFO(dev)->gen >= 4)
intel_crtc->dspaddr_offset =
- intel_compute_tile_offset(&x, &y, fb, 0,
- fb->pitches[0], rotation);
- linear_offset -= intel_crtc->dspaddr_offset;
- } else {
- intel_crtc->dspaddr_offset = linear_offset;
- }
+ intel_compute_tile_offset(&x, &y, plane_state, 0);
- if (rotation == BIT(DRM_ROTATE_180)) {
+ if (rotation == DRM_ROTATE_180) {
dspcntr |= DISPPLANE_ROTATE_180;
x += (crtc_state->pipe_src_w - 1);
y += (crtc_state->pipe_src_h - 1);
-
- /* Finding the last pixel of the last line of the display
- data and adding to linear_offset*/
- linear_offset +=
- (crtc_state->pipe_src_h - 1) * fb->pitches[0] +
- (crtc_state->pipe_src_w - 1) * cpp;
}
+ linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
+
+ if (INTEL_INFO(dev)->gen < 4)
+ intel_crtc->dspaddr_offset = linear_offset;
+
intel_crtc->adjusted_x = x;
intel_crtc->adjusted_y = y;
I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
if (INTEL_INFO(dev)->gen >= 4) {
I915_WRITE(DSPSURF(plane),
- i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+ intel_fb_gtt_offset(fb, rotation) +
+ intel_crtc->dspaddr_offset);
I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
I915_WRITE(DSPLINOFF(plane), linear_offset);
} else
- I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset);
+ I915_WRITE(DSPADDR(plane), i915_gem_object_ggtt_offset(obj, NULL) + linear_offset);
POSTING_READ(reg);
}
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
struct drm_framebuffer *fb = plane_state->base.fb;
- struct drm_i915_gem_object *obj = intel_fb_obj(fb);
int plane = intel_crtc->plane;
u32 linear_offset;
u32 dspcntr;
i915_reg_t reg = DSPCNTR(plane);
unsigned int rotation = plane_state->base.rotation;
- int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
- int x = plane_state->src.x1 >> 16;
- int y = plane_state->src.y1 >> 16;
+ int x = plane_state->base.src.x1 >> 16;
+ int y = plane_state->base.src.y1 >> 16;
dspcntr = DISPPLANE_GAMMA_ENABLE;
dspcntr |= DISPLAY_PLANE_ENABLE;
BUG();
}
- if (obj->tiling_mode != I915_TILING_NONE)
+ if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
dspcntr |= DISPPLANE_TILED;
if (!IS_HASWELL(dev) && !IS_BROADWELL(dev))
dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
- linear_offset = y * fb->pitches[0] + x * cpp;
+ intel_add_fb_offsets(&x, &y, plane_state, 0);
+
intel_crtc->dspaddr_offset =
- intel_compute_tile_offset(&x, &y, fb, 0,
- fb->pitches[0], rotation);
- linear_offset -= intel_crtc->dspaddr_offset;
- if (rotation == BIT(DRM_ROTATE_180)) {
+ intel_compute_tile_offset(&x, &y, plane_state, 0);
+
+ if (rotation == DRM_ROTATE_180) {
dspcntr |= DISPPLANE_ROTATE_180;
if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
x += (crtc_state->pipe_src_w - 1);
y += (crtc_state->pipe_src_h - 1);
-
- /* Finding the last pixel of the last line of the display
- data and adding to linear_offset*/
- linear_offset +=
- (crtc_state->pipe_src_h - 1) * fb->pitches[0] +
- (crtc_state->pipe_src_w - 1) * cpp;
}
}
+ linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
+
intel_crtc->adjusted_x = x;
intel_crtc->adjusted_y = y;
I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
I915_WRITE(DSPSURF(plane),
- i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+ intel_fb_gtt_offset(fb, rotation) +
+ intel_crtc->dspaddr_offset);
if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
} else {
}
}
-u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
- struct drm_i915_gem_object *obj,
- unsigned int plane)
+u32 intel_fb_gtt_offset(struct drm_framebuffer *fb,
+ unsigned int rotation)
{
+ struct drm_i915_gem_object *obj = intel_fb_obj(fb);
struct i915_ggtt_view view;
struct i915_vma *vma;
- u64 offset;
- intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb,
- intel_plane->base.state->rotation);
+ intel_fill_fb_ggtt_view(&view, fb, rotation);
- vma = i915_gem_obj_to_ggtt_view(obj, &view);
+ vma = i915_gem_object_to_ggtt(obj, &view);
if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n",
- view.type))
+ view.type))
return -1;
- offset = vma->node.start;
-
- if (plane == 1) {
- offset += vma->ggtt_view.params.rotated.uv_start_page *
- PAGE_SIZE;
- }
-
- WARN_ON(upper_32_bits(offset));
-
- return lower_32_bits(offset);
+ return i915_ggtt_offset(vma);
}
static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id)
}
}
+u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
+ unsigned int rotation)
+{
+ const struct drm_i915_private *dev_priv = to_i915(fb->dev);
+ u32 stride = intel_fb_pitch(fb, plane, rotation);
+
+ /*
+ * The stride is either expressed as a multiple of 64 bytes chunks for
+ * linear buffers or in number of tiles for tiled buffers.
+ */
+ if (intel_rotation_90_or_270(rotation)) {
+ int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+
+ stride /= intel_tile_height(dev_priv, fb->modifier[0], cpp);
+ } else {
+ stride /= intel_fb_stride_alignment(dev_priv, fb->modifier[0],
+ fb->pixel_format);
+ }
+
+ return stride;
+}
+
u32 skl_plane_ctl_format(uint32_t pixel_format)
{
switch (pixel_format) {
u32 skl_plane_ctl_rotation(unsigned int rotation)
{
switch (rotation) {
- case BIT(DRM_ROTATE_0):
+ case DRM_ROTATE_0:
break;
/*
* DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr
* while i915 HW rotation is clockwise, thats why this swapping.
*/
- case BIT(DRM_ROTATE_90):
+ case DRM_ROTATE_90:
return PLANE_CTL_ROTATE_270;
- case BIT(DRM_ROTATE_180):
+ case DRM_ROTATE_180:
return PLANE_CTL_ROTATE_180;
- case BIT(DRM_ROTATE_270):
+ case DRM_ROTATE_270:
return PLANE_CTL_ROTATE_90;
default:
MISSING_CASE(rotation);
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
struct drm_framebuffer *fb = plane_state->base.fb;
- struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+ const struct skl_wm_values *wm = &dev_priv->wm.skl_results;
int pipe = intel_crtc->pipe;
- u32 plane_ctl, stride_div, stride;
- u32 tile_height, plane_offset, plane_size;
+ u32 plane_ctl;
unsigned int rotation = plane_state->base.rotation;
- int x_offset, y_offset;
- u32 surf_addr;
+ u32 stride = skl_plane_stride(fb, 0, rotation);
+ u32 surf_addr = plane_state->main.offset;
int scaler_id = plane_state->scaler_id;
- int src_x = plane_state->src.x1 >> 16;
- int src_y = plane_state->src.y1 >> 16;
- int src_w = drm_rect_width(&plane_state->src) >> 16;
- int src_h = drm_rect_height(&plane_state->src) >> 16;
- int dst_x = plane_state->dst.x1;
- int dst_y = plane_state->dst.y1;
- int dst_w = drm_rect_width(&plane_state->dst);
- int dst_h = drm_rect_height(&plane_state->dst);
+ int src_x = plane_state->main.x;
+ int src_y = plane_state->main.y;
+ int src_w = drm_rect_width(&plane_state->base.src) >> 16;
+ int src_h = drm_rect_height(&plane_state->base.src) >> 16;
+ int dst_x = plane_state->base.dst.x1;
+ int dst_y = plane_state->base.dst.y1;
+ int dst_w = drm_rect_width(&plane_state->base.dst);
+ int dst_h = drm_rect_height(&plane_state->base.dst);
plane_ctl = PLANE_CTL_ENABLE |
PLANE_CTL_PIPE_GAMMA_ENABLE |
plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE;
plane_ctl |= skl_plane_ctl_rotation(rotation);
- stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0],
- fb->pixel_format);
- surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj, 0);
+ /* Sizes are 0 based */
+ src_w--;
+ src_h--;
+ dst_w--;
+ dst_h--;
- WARN_ON(drm_rect_width(&plane_state->src) == 0);
+ intel_crtc->adjusted_x = src_x;
+ intel_crtc->adjusted_y = src_y;
- if (intel_rotation_90_or_270(rotation)) {
- int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
-
- /* stride = Surface height in tiles */
- tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp);
- stride = DIV_ROUND_UP(fb->height, tile_height);
- x_offset = stride * tile_height - src_y - src_h;
- y_offset = src_x;
- plane_size = (src_w - 1) << 16 | (src_h - 1);
- } else {
- stride = fb->pitches[0] / stride_div;
- x_offset = src_x;
- y_offset = src_y;
- plane_size = (src_h - 1) << 16 | (src_w - 1);
- }
- plane_offset = y_offset << 16 | x_offset;
-
- intel_crtc->adjusted_x = x_offset;
- intel_crtc->adjusted_y = y_offset;
+ if (wm->dirty_pipes & drm_crtc_mask(&intel_crtc->base))
+ skl_write_plane_wm(intel_crtc, wm, 0);
I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl);
- I915_WRITE(PLANE_OFFSET(pipe, 0), plane_offset);
- I915_WRITE(PLANE_SIZE(pipe, 0), plane_size);
+ I915_WRITE(PLANE_OFFSET(pipe, 0), (src_y << 16) | src_x);
I915_WRITE(PLANE_STRIDE(pipe, 0), stride);
+ I915_WRITE(PLANE_SIZE(pipe, 0), (src_h << 16) | src_w);
if (scaler_id >= 0) {
uint32_t ps_ctrl = 0;
I915_WRITE(PLANE_POS(pipe, 0), (dst_y << 16) | dst_x);
}
- I915_WRITE(PLANE_SURF(pipe, 0), surf_addr);
+ I915_WRITE(PLANE_SURF(pipe, 0),
+ intel_fb_gtt_offset(fb, rotation) + surf_addr);
POSTING_READ(PLANE_SURF(pipe, 0));
}
{
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
- int pipe = to_intel_crtc(crtc)->pipe;
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int pipe = intel_crtc->pipe;
+
+ /*
+ * We only populate skl_results on watermark updates, and if the
+ * plane's visiblity isn't actually changing neither is its watermarks.
+ */
+ if (!crtc->primary->state->visible)
+ skl_write_plane_wm(intel_crtc, &dev_priv->wm.skl_results, 0);
I915_WRITE(PLANE_CTL(pipe, 0), 0);
I915_WRITE(PLANE_SURF(pipe, 0), 0);
struct intel_plane_state *plane_state =
to_intel_plane_state(plane->base.state);
- if (plane_state->visible)
+ if (plane_state->base.visible)
plane->update_plane(&plane->base,
to_intel_crtc_state(crtc->state),
plane_state);
return ret;
}
+static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
+{
+ return intel_has_gpu_reset(dev_priv) &&
+ INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv);
+}
+
void intel_prepare_reset(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = &dev_priv->drm;
struct drm_atomic_state *state;
int ret;
- /* no reset support for gen2 */
- if (IS_GEN2(dev_priv))
- return;
-
/*
* Need mode_config.mutex so that we don't
* trample ongoing ->detect() and whatnot.
}
/* reset doesn't touch the display, but flips might get nuked anyway, */
- if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
+ if (!i915.force_reset_modeset_test &&
+ !gpu_reset_clobbers_display(dev_priv))
return;
/*
*/
intel_complete_page_flips(dev_priv);
- /* no reset support for gen2 */
- if (IS_GEN2(dev_priv))
- return;
+ dev_priv->modeset_restore_state = NULL;
+ dev_priv->modeset_restore_state = NULL;
+
/* reset doesn't touch the display */
- if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
- /*
- * Flips in the rings have been nuked by the reset,
- * so update the base address of all primary
- * planes to the the last fb to make sure we're
- * showing the correct fb after a reset.
- *
- * FIXME: Atomic will make this obsolete since we won't schedule
- * CS-based flips (which might get lost in gpu resets) any more.
- */
- intel_update_primary_planes(dev);
+ if (!gpu_reset_clobbers_display(dev_priv)) {
+ if (!state) {
+ /*
+ * Flips in the rings have been nuked by the reset,
+ * so update the base address of all primary
+ * planes to the the last fb to make sure we're
+ * showing the correct fb after a reset.
+ *
+ * FIXME: Atomic will make this obsolete since we won't schedule
+ * CS-based flips (which might get lost in gpu resets) any more.
+ */
+ intel_update_primary_planes(dev);
+ } else {
+ ret = __intel_display_resume(dev, state);
+ if (ret)
+ DRM_ERROR("Restoring old state failed with %i\n", ret);
+ }
} else {
/*
* The display has been reset as well,
mutex_unlock(&dev->mode_config.mutex);
}
+static bool abort_flip_on_reset(struct intel_crtc *crtc)
+{
+ struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error;
+
+ if (i915_reset_in_progress(error))
+ return true;
+
+ if (crtc->reset_count != i915_reset_count(error))
+ return true;
+
+ return false;
+}
+
static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
{
struct drm_device *dev = crtc->dev;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- unsigned reset_counter;
bool pending;
- reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
- if (intel_crtc->reset_counter != reset_counter)
+ if (abort_flip_on_reset(intel_crtc))
return false;
spin_lock_irq(&dev->event_lock);
return 0;
}
-static void lpt_disable_iclkip(struct drm_i915_private *dev_priv)
+void lpt_disable_iclkip(struct drm_i915_private *dev_priv)
{
u32 temp;
intel_crtc->pipe, SKL_CRTC_INDEX);
return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
- &state->scaler_state.scaler_id, BIT(DRM_ROTATE_0),
+ &state->scaler_state.scaler_id, DRM_ROTATE_0,
state->pipe_src_w, state->pipe_src_h,
adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
}
struct drm_framebuffer *fb = plane_state->base.fb;
int ret;
- bool force_detach = !fb || !plane_state->visible;
+ bool force_detach = !fb || !plane_state->base.visible;
DRM_DEBUG_KMS("Updating scaler for [PLANE:%d:%s] scaler_user index %u.%u\n",
intel_plane->base.base.id, intel_plane->base.name,
drm_plane_index(&intel_plane->base),
&plane_state->scaler_id,
plane_state->base.rotation,
- drm_rect_width(&plane_state->src) >> 16,
- drm_rect_height(&plane_state->src) >> 16,
- drm_rect_width(&plane_state->dst),
- drm_rect_height(&plane_state->dst));
+ drm_rect_width(&plane_state->base.src) >> 16,
+ drm_rect_height(&plane_state->base.src) >> 16,
+ drm_rect_width(&plane_state->base.dst),
+ drm_rect_height(&plane_state->base.dst));
if (ret || plane_state->scaler_id < 0)
return ret;
struct drm_atomic_state *old_state = old_crtc_state->base.state;
struct intel_crtc_state *pipe_config =
to_intel_crtc_state(crtc->base.state);
- struct drm_device *dev = crtc->base.dev;
struct drm_plane *primary = crtc->base.primary;
struct drm_plane_state *old_pri_state =
drm_atomic_get_existing_plane_state(old_state, primary);
- intel_frontbuffer_flip(dev, pipe_config->fb_bits);
+ intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits);
crtc->wm.cxsr_allowed = true;
intel_fbc_post_update(crtc);
- if (primary_state->visible &&
+ if (primary_state->base.visible &&
(needs_modeset(&pipe_config->base) ||
- !old_primary_state->visible))
+ !old_primary_state->base.visible))
intel_post_enable_primary(&crtc->base);
}
}
intel_fbc_pre_update(crtc, pipe_config, primary_state);
- if (old_primary_state->visible &&
- (modeset || !primary_state->visible))
+ if (old_primary_state->base.visible &&
+ (modeset || !primary_state->base.visible))
intel_pre_disable_primary(&crtc->base);
}
* to compute the mask of flip planes precisely. For the time being
* consider this a flip to a NULL plane.
*/
- intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
+ intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe));
+}
+
+static void intel_encoders_pre_pll_enable(struct drm_crtc *crtc,
+ struct intel_crtc_state *crtc_state,
+ struct drm_atomic_state *old_state)
+{
+ struct drm_connector_state *old_conn_state;
+ struct drm_connector *conn;
+ int i;
+
+ for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ struct drm_connector_state *conn_state = conn->state;
+ struct intel_encoder *encoder =
+ to_intel_encoder(conn_state->best_encoder);
+
+ if (conn_state->crtc != crtc)
+ continue;
+
+ if (encoder->pre_pll_enable)
+ encoder->pre_pll_enable(encoder, crtc_state, conn_state);
+ }
+}
+
+static void intel_encoders_pre_enable(struct drm_crtc *crtc,
+ struct intel_crtc_state *crtc_state,
+ struct drm_atomic_state *old_state)
+{
+ struct drm_connector_state *old_conn_state;
+ struct drm_connector *conn;
+ int i;
+
+ for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ struct drm_connector_state *conn_state = conn->state;
+ struct intel_encoder *encoder =
+ to_intel_encoder(conn_state->best_encoder);
+
+ if (conn_state->crtc != crtc)
+ continue;
+
+ if (encoder->pre_enable)
+ encoder->pre_enable(encoder, crtc_state, conn_state);
+ }
+}
+
+static void intel_encoders_enable(struct drm_crtc *crtc,
+ struct intel_crtc_state *crtc_state,
+ struct drm_atomic_state *old_state)
+{
+ struct drm_connector_state *old_conn_state;
+ struct drm_connector *conn;
+ int i;
+
+ for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ struct drm_connector_state *conn_state = conn->state;
+ struct intel_encoder *encoder =
+ to_intel_encoder(conn_state->best_encoder);
+
+ if (conn_state->crtc != crtc)
+ continue;
+
+ encoder->enable(encoder, crtc_state, conn_state);
+ intel_opregion_notify_encoder(encoder, true);
+ }
+}
+
+static void intel_encoders_disable(struct drm_crtc *crtc,
+ struct intel_crtc_state *old_crtc_state,
+ struct drm_atomic_state *old_state)
+{
+ struct drm_connector_state *old_conn_state;
+ struct drm_connector *conn;
+ int i;
+
+ for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ struct intel_encoder *encoder =
+ to_intel_encoder(old_conn_state->best_encoder);
+
+ if (old_conn_state->crtc != crtc)
+ continue;
+
+ intel_opregion_notify_encoder(encoder, false);
+ encoder->disable(encoder, old_crtc_state, old_conn_state);
+ }
+}
+
+static void intel_encoders_post_disable(struct drm_crtc *crtc,
+ struct intel_crtc_state *old_crtc_state,
+ struct drm_atomic_state *old_state)
+{
+ struct drm_connector_state *old_conn_state;
+ struct drm_connector *conn;
+ int i;
+
+ for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ struct intel_encoder *encoder =
+ to_intel_encoder(old_conn_state->best_encoder);
+
+ if (old_conn_state->crtc != crtc)
+ continue;
+
+ if (encoder->post_disable)
+ encoder->post_disable(encoder, old_crtc_state, old_conn_state);
+ }
+}
+
+static void intel_encoders_post_pll_disable(struct drm_crtc *crtc,
+ struct intel_crtc_state *old_crtc_state,
+ struct drm_atomic_state *old_state)
+{
+ struct drm_connector_state *old_conn_state;
+ struct drm_connector *conn;
+ int i;
+
+ for_each_connector_in_state(old_state, conn, old_conn_state, i) {
+ struct intel_encoder *encoder =
+ to_intel_encoder(old_conn_state->best_encoder);
+
+ if (old_conn_state->crtc != crtc)
+ continue;
+
+ if (encoder->post_pll_disable)
+ encoder->post_pll_disable(encoder, old_crtc_state, old_conn_state);
+ }
}
-static void ironlake_crtc_enable(struct drm_crtc *crtc)
+static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config,
+ struct drm_atomic_state *old_state)
{
+ struct drm_crtc *crtc = pipe_config->base.crtc;
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- struct intel_encoder *encoder;
int pipe = intel_crtc->pipe;
- struct intel_crtc_state *pipe_config =
- to_intel_crtc_state(crtc->state);
if (WARN_ON(intel_crtc->active))
return;
intel_crtc->active = true;
- for_each_encoder_on_crtc(dev, crtc, encoder)
- if (encoder->pre_enable)
- encoder->pre_enable(encoder);
+ intel_encoders_pre_enable(crtc, pipe_config, old_state);
if (intel_crtc->config->has_pch_encoder) {
/* Note: FDI PLL enabling _must_ be done before we enable the
assert_vblank_disabled(crtc);
drm_crtc_vblank_on(crtc);
- for_each_encoder_on_crtc(dev, crtc, encoder)
- encoder->enable(encoder);
+ intel_encoders_enable(crtc, pipe_config, old_state);
if (HAS_PCH_CPT(dev))
cpt_verify_modeset(dev, intel_crtc->pipe);
return HAS_IPS(crtc->base.dev) && crtc->pipe == PIPE_A;
}
-static void haswell_crtc_enable(struct drm_crtc *crtc)
+static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
+ struct drm_atomic_state *old_state)
{
+ struct drm_crtc *crtc = pipe_config->base.crtc;
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- struct intel_encoder *encoder;
int pipe = intel_crtc->pipe, hsw_workaround_pipe;
enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
- struct intel_crtc_state *pipe_config =
- to_intel_crtc_state(crtc->state);
if (WARN_ON(intel_crtc->active))
return;
intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
false);
- for_each_encoder_on_crtc(dev, crtc, encoder)
- if (encoder->pre_pll_enable)
- encoder->pre_pll_enable(encoder);
+ intel_encoders_pre_pll_enable(crtc, pipe_config, old_state);
if (intel_crtc->config->shared_dpll)
intel_enable_shared_dpll(intel_crtc);
else
intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
- for_each_encoder_on_crtc(dev, crtc, encoder) {
- if (encoder->pre_enable)
- encoder->pre_enable(encoder);
- }
+ intel_encoders_pre_enable(crtc, pipe_config, old_state);
if (intel_crtc->config->has_pch_encoder)
dev_priv->display.fdi_link_train(crtc);
assert_vblank_disabled(crtc);
drm_crtc_vblank_on(crtc);
- for_each_encoder_on_crtc(dev, crtc, encoder) {
- encoder->enable(encoder);
- intel_opregion_notify_encoder(encoder, true);
- }
+ intel_encoders_enable(crtc, pipe_config, old_state);
if (intel_crtc->config->has_pch_encoder) {
intel_wait_for_vblank(dev, pipe);
}
}
-static void ironlake_crtc_disable(struct drm_crtc *crtc)
+static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state,
+ struct drm_atomic_state *old_state)
{
+ struct drm_crtc *crtc = old_crtc_state->base.crtc;
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- struct intel_encoder *encoder;
int pipe = intel_crtc->pipe;
/*
intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
}
- for_each_encoder_on_crtc(dev, crtc, encoder)
- encoder->disable(encoder);
+ intel_encoders_disable(crtc, old_crtc_state, old_state);
drm_crtc_vblank_off(crtc);
assert_vblank_disabled(crtc);
if (intel_crtc->config->has_pch_encoder)
ironlake_fdi_disable(crtc);
- for_each_encoder_on_crtc(dev, crtc, encoder)
- if (encoder->post_disable)
- encoder->post_disable(encoder);
+ intel_encoders_post_disable(crtc, old_crtc_state, old_state);
if (intel_crtc->config->has_pch_encoder) {
ironlake_disable_pch_transcoder(dev_priv, pipe);
intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
}
-static void haswell_crtc_disable(struct drm_crtc *crtc)
+static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state,
+ struct drm_atomic_state *old_state)
{
+ struct drm_crtc *crtc = old_crtc_state->base.crtc;
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- struct intel_encoder *encoder;
enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
if (intel_crtc->config->has_pch_encoder)
intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
false);
- for_each_encoder_on_crtc(dev, crtc, encoder) {
- intel_opregion_notify_encoder(encoder, false);
- encoder->disable(encoder);
- }
+ intel_encoders_disable(crtc, old_crtc_state, old_state);
drm_crtc_vblank_off(crtc);
assert_vblank_disabled(crtc);
if (!transcoder_is_dsi(cpu_transcoder))
intel_ddi_disable_pipe_clock(intel_crtc);
- for_each_encoder_on_crtc(dev, crtc, encoder)
- if (encoder->post_disable)
- encoder->post_disable(encoder);
-
- if (intel_crtc->config->has_pch_encoder) {
- lpt_disable_pch_transcoder(dev_priv);
- lpt_disable_iclkip(dev_priv);
- intel_ddi_fdi_disable(crtc);
+ intel_encoders_post_disable(crtc, old_crtc_state, old_state);
+ if (old_crtc_state->has_pch_encoder)
intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
true);
- }
}
static void i9xx_pfit_enable(struct intel_crtc *crtc)
intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A);
}
-static void valleyview_crtc_enable(struct drm_crtc *crtc)
+static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config,
+ struct drm_atomic_state *old_state)
{
+ struct drm_crtc *crtc = pipe_config->base.crtc;
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- struct intel_encoder *encoder;
- struct intel_crtc_state *pipe_config =
- to_intel_crtc_state(crtc->state);
int pipe = intel_crtc->pipe;
if (WARN_ON(intel_crtc->active))
intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
- for_each_encoder_on_crtc(dev, crtc, encoder)
- if (encoder->pre_pll_enable)
- encoder->pre_pll_enable(encoder);
+ intel_encoders_pre_pll_enable(crtc, pipe_config, old_state);
if (IS_CHERRYVIEW(dev)) {
chv_prepare_pll(intel_crtc, intel_crtc->config);
vlv_enable_pll(intel_crtc, intel_crtc->config);
}
- for_each_encoder_on_crtc(dev, crtc, encoder)
- if (encoder->pre_enable)
- encoder->pre_enable(encoder);
+ intel_encoders_pre_enable(crtc, pipe_config, old_state);
i9xx_pfit_enable(intel_crtc);
assert_vblank_disabled(crtc);
drm_crtc_vblank_on(crtc);
- for_each_encoder_on_crtc(dev, crtc, encoder)
- encoder->enable(encoder);
+ intel_encoders_enable(crtc, pipe_config, old_state);
}
static void i9xx_set_pll_dividers(struct intel_crtc *crtc)
I915_WRITE(FP1(crtc->pipe), crtc->config->dpll_hw_state.fp1);
}
-static void i9xx_crtc_enable(struct drm_crtc *crtc)
+static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
+ struct drm_atomic_state *old_state)
{
+ struct drm_crtc *crtc = pipe_config->base.crtc;
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- struct intel_encoder *encoder;
- struct intel_crtc_state *pipe_config =
- to_intel_crtc_state(crtc->state);
enum pipe pipe = intel_crtc->pipe;
if (WARN_ON(intel_crtc->active))
if (!IS_GEN2(dev))
intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
- for_each_encoder_on_crtc(dev, crtc, encoder)
- if (encoder->pre_enable)
- encoder->pre_enable(encoder);
+ intel_encoders_pre_enable(crtc, pipe_config, old_state);
i9xx_enable_pll(intel_crtc);
assert_vblank_disabled(crtc);
drm_crtc_vblank_on(crtc);
- for_each_encoder_on_crtc(dev, crtc, encoder)
- encoder->enable(encoder);
+ intel_encoders_enable(crtc, pipe_config, old_state);
}
static void i9xx_pfit_disable(struct intel_crtc *crtc)
I915_WRITE(PFIT_CONTROL, 0);
}
-static void i9xx_crtc_disable(struct drm_crtc *crtc)
+static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
+ struct drm_atomic_state *old_state)
{
+ struct drm_crtc *crtc = old_crtc_state->base.crtc;
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- struct intel_encoder *encoder;
int pipe = intel_crtc->pipe;
/*
if (IS_GEN2(dev))
intel_wait_for_vblank(dev, pipe);
- for_each_encoder_on_crtc(dev, crtc, encoder)
- encoder->disable(encoder);
+ intel_encoders_disable(crtc, old_crtc_state, old_state);
drm_crtc_vblank_off(crtc);
assert_vblank_disabled(crtc);
i9xx_pfit_disable(intel_crtc);
- for_each_encoder_on_crtc(dev, crtc, encoder)
- if (encoder->post_disable)
- encoder->post_disable(encoder);
+ intel_encoders_post_disable(crtc, old_crtc_state, old_state);
if (!intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DSI)) {
if (IS_CHERRYVIEW(dev))
i9xx_disable_pll(intel_crtc);
}
- for_each_encoder_on_crtc(dev, crtc, encoder)
- if (encoder->post_pll_disable)
- encoder->post_pll_disable(encoder);
+ intel_encoders_post_pll_disable(crtc, old_crtc_state, old_state);
if (!IS_GEN2(dev))
intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
struct drm_i915_private *dev_priv = to_i915(crtc->dev);
enum intel_display_power_domain domain;
unsigned long domains;
+ struct drm_atomic_state *state;
+ struct intel_crtc_state *crtc_state;
+ int ret;
if (!intel_crtc->active)
return;
- if (to_intel_plane_state(crtc->primary->state)->visible) {
+ if (to_intel_plane_state(crtc->primary->state)->base.visible) {
WARN_ON(intel_crtc->flip_work);
intel_pre_disable_primary_noatomic(crtc);
intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary));
- to_intel_plane_state(crtc->primary->state)->visible = false;
+ to_intel_plane_state(crtc->primary->state)->base.visible = false;
}
- dev_priv->display.crtc_disable(crtc);
+ state = drm_atomic_state_alloc(crtc->dev);
+ state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
+
+ /* Everything's already locked, -EDEADLK can't happen. */
+ crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
+ ret = drm_atomic_add_affected_connectors(state, crtc);
+
+ WARN_ON(IS_ERR(crtc_state) || ret);
+
+ dev_priv->display.crtc_disable(crtc_state, state);
+
+ drm_atomic_state_free(state);
DRM_DEBUG_KMS("[CRTC:%d:%s] hw state adjusted, was enabled, now disabled\n",
crtc->base.id, crtc->name);
static int pnv_get_display_clock_speed(struct drm_device *dev)
{
+ struct pci_dev *pdev = dev->pdev;
u16 gcfgc = 0;
- pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+ pci_read_config_word(pdev, GCFGC, &gcfgc);
switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
case GC_DISPLAY_CLOCK_267_MHZ_PNV:
static int i915gm_get_display_clock_speed(struct drm_device *dev)
{
+ struct pci_dev *pdev = dev->pdev;
u16 gcfgc = 0;
- pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+ pci_read_config_word(pdev, GCFGC, &gcfgc);
if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
return 133333;
static int i85x_get_display_clock_speed(struct drm_device *dev)
{
+ struct pci_dev *pdev = dev->pdev;
u16 hpllcc = 0;
/*
* encoding is different :(
* FIXME is this the right way to detect 852GM/852GMV?
*/
- if (dev->pdev->revision == 0x1)
+ if (pdev->revision == 0x1)
return 133333;
- pci_bus_read_config_word(dev->pdev->bus,
+ pci_bus_read_config_word(pdev->bus,
PCI_DEVFN(0, 3), HPLLCC, &hpllcc);
/* Assume that the hardware is in the high speed state. This
static int gm45_get_display_clock_speed(struct drm_device *dev)
{
+ struct pci_dev *pdev = dev->pdev;
unsigned int cdclk_sel, vco = intel_hpll_vco(dev);
uint16_t tmp = 0;
- pci_read_config_word(dev->pdev, GCFGC, &tmp);
+ pci_read_config_word(pdev, GCFGC, &tmp);
cdclk_sel = (tmp >> 12) & 0x1;
static int i965gm_get_display_clock_speed(struct drm_device *dev)
{
+ struct pci_dev *pdev = dev->pdev;
static const uint8_t div_3200[] = { 16, 10, 8 };
static const uint8_t div_4000[] = { 20, 12, 10 };
static const uint8_t div_5333[] = { 24, 16, 14 };
unsigned int cdclk_sel, vco = intel_hpll_vco(dev);
uint16_t tmp = 0;
- pci_read_config_word(dev->pdev, GCFGC, &tmp);
+ pci_read_config_word(pdev, GCFGC, &tmp);
cdclk_sel = ((tmp >> 8) & 0x1f) - 1;
static int g33_get_display_clock_speed(struct drm_device *dev)
{
+ struct pci_dev *pdev = dev->pdev;
static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 };
static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 };
static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 };
unsigned int cdclk_sel, vco = intel_hpll_vco(dev);
uint16_t tmp = 0;
- pci_read_config_word(dev->pdev, GCFGC, &tmp);
+ pci_read_config_word(pdev, GCFGC, &tmp);
cdclk_sel = (tmp >> 4) & 0x7;
return;
error:
- kfree(fb);
+ kfree(intel_fb);
}
static void ironlake_get_pfit_config(struct intel_crtc *crtc,
I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n");
I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n");
I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n");
- I915_STATE_WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n");
+ I915_STATE_WARN(I915_READ(PP_STATUS(0)) & PP_ON, "Panel power on\n");
I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
"CPU PWM1 enabled\n");
if (IS_HASWELL(dev))
mutex_lock(&dev_priv->rps.hw_lock);
if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP,
val))
- DRM_ERROR("Failed to write to D_COMP\n");
+ DRM_DEBUG_KMS("Failed to write to D_COMP\n");
mutex_unlock(&dev_priv->rps.hw_lock);
} else {
I915_WRITE(D_COMP_BDW, val);
switch (port) {
case PORT_A:
- pipe_config->ddi_pll_sel = SKL_DPLL0;
id = DPLL_ID_SKL_DPLL0;
break;
case PORT_B:
- pipe_config->ddi_pll_sel = SKL_DPLL1;
id = DPLL_ID_SKL_DPLL1;
break;
case PORT_C:
- pipe_config->ddi_pll_sel = SKL_DPLL2;
id = DPLL_ID_SKL_DPLL2;
break;
default:
u32 temp;
temp = I915_READ(DPLL_CTRL2) & DPLL_CTRL2_DDI_CLK_SEL_MASK(port);
- pipe_config->ddi_pll_sel = temp >> (port * 3 + 1);
+ id = temp >> (port * 3 + 1);
- switch (pipe_config->ddi_pll_sel) {
- case SKL_DPLL0:
- id = DPLL_ID_SKL_DPLL0;
- break;
- case SKL_DPLL1:
- id = DPLL_ID_SKL_DPLL1;
- break;
- case SKL_DPLL2:
- id = DPLL_ID_SKL_DPLL2;
- break;
- case SKL_DPLL3:
- id = DPLL_ID_SKL_DPLL3;
- break;
- default:
- MISSING_CASE(pipe_config->ddi_pll_sel);
+ if (WARN_ON(id < SKL_DPLL0 || id > SKL_DPLL3))
return;
- }
pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id);
}
struct intel_crtc_state *pipe_config)
{
enum intel_dpll_id id;
+ uint32_t ddi_pll_sel = I915_READ(PORT_CLK_SEL(port));
- pipe_config->ddi_pll_sel = I915_READ(PORT_CLK_SEL(port));
-
- switch (pipe_config->ddi_pll_sel) {
+ switch (ddi_pll_sel) {
case PORT_CLK_SEL_WRPLL1:
id = DPLL_ID_WRPLL1;
break;
id = DPLL_ID_LCPLL_2700;
break;
default:
- MISSING_CASE(pipe_config->ddi_pll_sel);
+ MISSING_CASE(ddi_pll_sel);
/* fall through */
case PORT_CLK_SEL_NONE:
return;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
uint32_t cntl = 0, size = 0;
- if (plane_state && plane_state->visible) {
+ if (plane_state && plane_state->base.visible) {
unsigned int width = plane_state->base.crtc_w;
unsigned int height = plane_state->base.crtc_h;
unsigned int stride = roundup_pow_of_two(width) * 4;
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ const struct skl_wm_values *wm = &dev_priv->wm.skl_results;
int pipe = intel_crtc->pipe;
uint32_t cntl = 0;
- if (plane_state && plane_state->visible) {
+ if (INTEL_GEN(dev_priv) >= 9 && wm->dirty_pipes & drm_crtc_mask(crtc))
+ skl_write_cursor_wm(intel_crtc, wm);
+
+ if (plane_state && plane_state->base.visible) {
cntl = MCURSOR_GAMMA_ENABLE;
switch (plane_state->base.crtc_w) {
case 64:
if (HAS_DDI(dev))
cntl |= CURSOR_PIPE_CSC_ENABLE;
- if (plane_state->base.rotation == BIT(DRM_ROTATE_180))
+ if (plane_state->base.rotation == DRM_ROTATE_180)
cntl |= CURSOR_ROTATE_180;
}
/* ILK+ do this automagically */
if (HAS_GMCH_DISPLAY(dev) &&
- plane_state->base.rotation == BIT(DRM_ROTATE_180)) {
+ plane_state->base.rotation == DRM_ROTATE_180) {
base += (plane_state->base.crtc_h *
plane_state->base.crtc_w - 1) * 4;
}
fb = intel_framebuffer_create(dev, &mode_cmd, obj);
if (IS_ERR(fb))
- drm_gem_object_unreference_unlocked(&obj->base);
+ i915_gem_object_put_unlocked(obj);
return fb;
}
mutex_lock(&dev->struct_mutex);
intel_unpin_fb_obj(work->old_fb, primary->state->rotation);
- drm_gem_object_unreference(&work->pending_flip_obj->base);
-
- if (work->flip_queued_req)
- i915_gem_request_assign(&work->flip_queued_req, NULL);
+ i915_gem_object_put(work->pending_flip_obj);
mutex_unlock(&dev->struct_mutex);
- intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit);
+ i915_gem_request_put(work->flip_queued_req);
+
+ intel_frontbuffer_flip_complete(to_i915(dev),
+ to_intel_plane(primary)->frontbuffer_bit);
intel_fbc_post_update(crtc);
drm_framebuffer_unreference(work->old_fb);
{
struct drm_device *dev = crtc->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
- unsigned reset_counter;
- reset_counter = i915_reset_counter(&dev_priv->gpu_error);
- if (crtc->reset_counter != reset_counter)
+ if (abort_flip_on_reset(crtc))
return true;
/*
struct drm_i915_gem_request *req,
uint32_t flags)
{
- struct intel_engine_cs *engine = req->engine;
+ struct intel_ring *ring = req->ring;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
u32 flip_mask;
int ret;
flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
else
flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
- intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
- intel_ring_emit(engine, MI_NOOP);
- intel_ring_emit(engine, MI_DISPLAY_FLIP |
+ intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_emit(ring, MI_DISPLAY_FLIP |
MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
- intel_ring_emit(engine, fb->pitches[0]);
- intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
- intel_ring_emit(engine, 0); /* aux display base address, unused */
+ intel_ring_emit(ring, fb->pitches[0]);
+ intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
+ intel_ring_emit(ring, 0); /* aux display base address, unused */
return 0;
}
struct drm_i915_gem_request *req,
uint32_t flags)
{
- struct intel_engine_cs *engine = req->engine;
+ struct intel_ring *ring = req->ring;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
u32 flip_mask;
int ret;
flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
else
flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
- intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
- intel_ring_emit(engine, MI_NOOP);
- intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 |
+ intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 |
MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
- intel_ring_emit(engine, fb->pitches[0]);
- intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
- intel_ring_emit(engine, MI_NOOP);
+ intel_ring_emit(ring, fb->pitches[0]);
+ intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
+ intel_ring_emit(ring, MI_NOOP);
return 0;
}
struct drm_i915_gem_request *req,
uint32_t flags)
{
- struct intel_engine_cs *engine = req->engine;
+ struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
uint32_t pf, pipesrc;
* Display Registers (which do not change across a page-flip)
* so we need only reprogram the base address.
*/
- intel_ring_emit(engine, MI_DISPLAY_FLIP |
+ intel_ring_emit(ring, MI_DISPLAY_FLIP |
MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
- intel_ring_emit(engine, fb->pitches[0]);
- intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset |
- obj->tiling_mode);
+ intel_ring_emit(ring, fb->pitches[0]);
+ intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset |
+ intel_fb_modifier_to_tiling(fb->modifier[0]));
/* XXX Enabling the panel-fitter across page-flip is so far
* untested on non-native modes, so ignore it for now.
*/
pf = 0;
pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
- intel_ring_emit(engine, pf | pipesrc);
+ intel_ring_emit(ring, pf | pipesrc);
return 0;
}
struct drm_i915_gem_request *req,
uint32_t flags)
{
- struct intel_engine_cs *engine = req->engine;
+ struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
uint32_t pf, pipesrc;
if (ret)
return ret;
- intel_ring_emit(engine, MI_DISPLAY_FLIP |
+ intel_ring_emit(ring, MI_DISPLAY_FLIP |
MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
- intel_ring_emit(engine, fb->pitches[0] | obj->tiling_mode);
- intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
+ intel_ring_emit(ring, fb->pitches[0] |
+ intel_fb_modifier_to_tiling(fb->modifier[0]));
+ intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
/* Contrary to the suggestions in the documentation,
* "Enable Panel Fitter" does not seem to be required when page
*/
pf = 0;
pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
- intel_ring_emit(engine, pf | pipesrc);
+ intel_ring_emit(ring, pf | pipesrc);
return 0;
}
struct drm_i915_gem_request *req,
uint32_t flags)
{
- struct intel_engine_cs *engine = req->engine;
+ struct intel_ring *ring = req->ring;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
uint32_t plane_bit = 0;
int len, ret;
}
len = 4;
- if (engine->id == RCS) {
+ if (req->engine->id == RCS) {
len += 6;
/*
* On Gen 8, SRM is now taking an extra dword to accommodate
* for the RCS also doesn't appear to drop events. Setting the DERRMR
* to zero does lead to lockups within MI_DISPLAY_FLIP.
*/
- if (engine->id == RCS) {
- intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
- intel_ring_emit_reg(engine, DERRMR);
- intel_ring_emit(engine, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
+ if (req->engine->id == RCS) {
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit_reg(ring, DERRMR);
+ intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
DERRMR_PIPEB_PRI_FLIP_DONE |
DERRMR_PIPEC_PRI_FLIP_DONE));
if (IS_GEN8(dev))
- intel_ring_emit(engine, MI_STORE_REGISTER_MEM_GEN8 |
+ intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 |
MI_SRM_LRM_GLOBAL_GTT);
else
- intel_ring_emit(engine, MI_STORE_REGISTER_MEM |
+ intel_ring_emit(ring, MI_STORE_REGISTER_MEM |
MI_SRM_LRM_GLOBAL_GTT);
- intel_ring_emit_reg(engine, DERRMR);
- intel_ring_emit(engine, engine->scratch.gtt_offset + 256);
+ intel_ring_emit_reg(ring, DERRMR);
+ intel_ring_emit(ring,
+ i915_ggtt_offset(req->engine->scratch) + 256);
if (IS_GEN8(dev)) {
- intel_ring_emit(engine, 0);
- intel_ring_emit(engine, MI_NOOP);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, MI_NOOP);
}
}
- intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | plane_bit);
- intel_ring_emit(engine, (fb->pitches[0] | obj->tiling_mode));
- intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
- intel_ring_emit(engine, (MI_NOOP));
+ intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
+ intel_ring_emit(ring, fb->pitches[0] |
+ intel_fb_modifier_to_tiling(fb->modifier[0]));
+ intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
+ intel_ring_emit(ring, (MI_NOOP));
return 0;
}
if (resv && !reservation_object_test_signaled_rcu(resv, false))
return true;
- return engine != i915_gem_request_get_engine(obj->last_write_req);
+ return engine != i915_gem_active_get_engine(&obj->last_write,
+ &obj->base.dev->struct_mutex);
}
static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
const enum pipe pipe = intel_crtc->pipe;
- u32 ctl, stride, tile_height;
+ u32 ctl, stride = skl_plane_stride(fb, 0, rotation);
ctl = I915_READ(PLANE_CTL(pipe, 0));
ctl &= ~PLANE_CTL_TILED_MASK;
MISSING_CASE(fb->modifier[0]);
}
- /*
- * The stride is either expressed as a multiple of 64 bytes chunks for
- * linear buffers or in number of tiles for tiled buffers.
- */
- if (intel_rotation_90_or_270(rotation)) {
- /* stride = Surface height in tiles */
- tile_height = intel_tile_height(dev_priv, fb->modifier[0], 0);
- stride = DIV_ROUND_UP(fb->height, tile_height);
- } else {
- stride = fb->pitches[0] /
- intel_fb_stride_alignment(dev_priv, fb->modifier[0],
- fb->pixel_format);
- }
-
/*
* Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on
* PLANE_SURF updates, the update is then guaranteed to be atomic.
{
struct drm_device *dev = intel_crtc->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
- struct intel_framebuffer *intel_fb =
- to_intel_framebuffer(intel_crtc->base.primary->fb);
- struct drm_i915_gem_object *obj = intel_fb->obj;
+ struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
i915_reg_t reg = DSPCNTR(intel_crtc->plane);
u32 dspcntr;
dspcntr = I915_READ(reg);
- if (obj->tiling_mode != I915_TILING_NONE)
+ if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
dspcntr |= DISPPLANE_TILED;
else
dspcntr &= ~DISPPLANE_TILED;
struct reservation_object *resv;
if (work->flip_queued_req)
- WARN_ON(__i915_wait_request(work->flip_queued_req,
- false, NULL,
- &dev_priv->rps.mmioflips));
+ WARN_ON(i915_wait_request(work->flip_queued_req,
+ 0, NULL, NO_WAITBOOST));
/* For framebuffer backed by dmabuf, wait for fence */
resv = i915_gem_object_get_dmabuf_resv(obj);
struct intel_flip_work *work;
struct intel_engine_cs *engine;
bool mmio_flip;
- struct drm_i915_gem_request *request = NULL;
+ struct drm_i915_gem_request *request;
+ struct i915_vma *vma;
int ret;
/*
/* Reference the objects for the scheduled work. */
drm_framebuffer_reference(work->old_fb);
- drm_gem_object_reference(&obj->base);
crtc->primary->fb = fb;
update_state_fb(crtc->primary);
- intel_fbc_pre_update(intel_crtc, intel_crtc->config,
- to_intel_plane_state(primary->state));
-
- work->pending_flip_obj = obj;
+ work->pending_flip_obj = i915_gem_object_get(obj);
ret = i915_mutex_lock_interruptible(dev);
if (ret)
goto cleanup;
- intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
- if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+ intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error);
+ if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
ret = -EIO;
goto cleanup;
}
if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
engine = &dev_priv->engine[BCS];
- if (obj->tiling_mode != intel_fb_obj(work->old_fb)->tiling_mode)
+ if (fb->modifier[0] != old_fb->modifier[0])
/* vlv: DISPLAY_FLIP fails to change tiling */
engine = NULL;
} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
engine = &dev_priv->engine[BCS];
} else if (INTEL_INFO(dev)->gen >= 7) {
- engine = i915_gem_request_get_engine(obj->last_write_req);
+ engine = i915_gem_active_get_engine(&obj->last_write,
+ &obj->base.dev->struct_mutex);
if (engine == NULL || engine->id != RCS)
engine = &dev_priv->engine[BCS];
} else {
mmio_flip = use_mmio_flip(engine, obj);
- /* When using CS flips, we want to emit semaphores between rings.
- * However, when using mmio flips we will create a task to do the
- * synchronisation, so all we want here is to pin the framebuffer
- * into the display plane and skip any waits.
- */
- if (!mmio_flip) {
- ret = i915_gem_object_sync(obj, engine, &request);
- if (!ret && !request) {
- request = i915_gem_request_alloc(engine, NULL);
- ret = PTR_ERR_OR_ZERO(request);
- }
-
- if (ret)
- goto cleanup_pending;
- }
-
- ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
- if (ret)
+ vma = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
goto cleanup_pending;
+ }
- work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary),
- obj, 0);
+ work->gtt_offset = intel_fb_gtt_offset(fb, primary->state->rotation);
work->gtt_offset += intel_crtc->dspaddr_offset;
work->rotation = crtc->primary->state->rotation;
+ /*
+ * There's the potential that the next frame will not be compatible with
+ * FBC, so we want to call pre_update() before the actual page flip.
+ * The problem is that pre_update() caches some information about the fb
+ * object, so we want to do this only after the object is pinned. Let's
+ * be on the safe side and do this immediately before scheduling the
+ * flip.
+ */
+ intel_fbc_pre_update(intel_crtc, intel_crtc->config,
+ to_intel_plane_state(primary->state));
+
if (mmio_flip) {
INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
- i915_gem_request_assign(&work->flip_queued_req,
- obj->last_write_req);
-
+ work->flip_queued_req = i915_gem_active_get(&obj->last_write,
+ &obj->base.dev->struct_mutex);
schedule_work(&work->mmio_work);
} else {
- i915_gem_request_assign(&work->flip_queued_req, request);
+ request = i915_gem_request_alloc(engine, engine->last_context);
+ if (IS_ERR(request)) {
+ ret = PTR_ERR(request);
+ goto cleanup_unpin;
+ }
+
+ ret = i915_gem_request_await_object(request, obj, false);
+ if (ret)
+ goto cleanup_request;
+
ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request,
page_flip_flags);
if (ret)
- goto cleanup_unpin;
+ goto cleanup_request;
intel_mark_page_flip_active(intel_crtc, work);
+ work->flip_queued_req = i915_gem_request_get(request);
i915_add_request_no_flush(request);
}
to_intel_plane(primary)->frontbuffer_bit);
mutex_unlock(&dev->struct_mutex);
- intel_frontbuffer_flip_prepare(dev,
+ intel_frontbuffer_flip_prepare(to_i915(dev),
to_intel_plane(primary)->frontbuffer_bit);
trace_i915_flip_request(intel_crtc->plane, obj);
return 0;
+cleanup_request:
+ i915_add_request_no_flush(request);
cleanup_unpin:
intel_unpin_fb_obj(fb, crtc->primary->state->rotation);
cleanup_pending:
- if (!IS_ERR_OR_NULL(request))
- i915_add_request_no_flush(request);
atomic_dec(&intel_crtc->unpin_work_count);
mutex_unlock(&dev->struct_mutex);
cleanup:
crtc->primary->fb = old_fb;
update_state_fb(crtc->primary);
- drm_gem_object_unreference_unlocked(&obj->base);
+ i915_gem_object_put_unlocked(obj);
drm_framebuffer_unreference(work->old_fb);
spin_lock_irq(&dev->event_lock);
struct intel_plane_state *cur = to_intel_plane_state(plane->state);
/* Update watermarks on tiling or size changes. */
- if (new->visible != cur->visible)
+ if (new->base.visible != cur->base.visible)
return true;
if (!cur->base.fb || !new->base.fb)
if (cur->base.fb->modifier[0] != new->base.fb->modifier[0] ||
cur->base.rotation != new->base.rotation ||
- drm_rect_width(&new->src) != drm_rect_width(&cur->src) ||
- drm_rect_height(&new->src) != drm_rect_height(&cur->src) ||
- drm_rect_width(&new->dst) != drm_rect_width(&cur->dst) ||
- drm_rect_height(&new->dst) != drm_rect_height(&cur->dst))
+ drm_rect_width(&new->base.src) != drm_rect_width(&cur->base.src) ||
+ drm_rect_height(&new->base.src) != drm_rect_height(&cur->base.src) ||
+ drm_rect_width(&new->base.dst) != drm_rect_width(&cur->base.dst) ||
+ drm_rect_height(&new->base.dst) != drm_rect_height(&cur->base.dst))
return true;
return false;
static bool needs_scaling(struct intel_plane_state *state)
{
- int src_w = drm_rect_width(&state->src) >> 16;
- int src_h = drm_rect_height(&state->src) >> 16;
- int dst_w = drm_rect_width(&state->dst);
- int dst_h = drm_rect_height(&state->dst);
+ int src_w = drm_rect_width(&state->base.src) >> 16;
+ int src_h = drm_rect_height(&state->base.src) >> 16;
+ int dst_w = drm_rect_width(&state->base.dst);
+ int dst_h = drm_rect_height(&state->base.dst);
return (src_w != dst_w || src_h != dst_h);
}
return ret;
}
- was_visible = old_plane_state->visible;
- visible = to_intel_plane_state(plane_state)->visible;
+ was_visible = old_plane_state->base.visible;
+ visible = to_intel_plane_state(plane_state)->base.visible;
if (!was_crtc_enabled && WARN_ON(was_visible))
was_visible = false;
* only combine the results from all planes in the current place?
*/
if (!is_crtc_enabled)
- to_intel_plane_state(plane_state)->visible = visible = false;
+ to_intel_plane_state(plane_state)->base.visible = visible = false;
if (!was_visible && !visible)
return 0;
DRM_DEBUG_KMS("double wide: %i\n", pipe_config->double_wide);
if (IS_BROXTON(dev)) {
- DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: ebb0: 0x%x, ebb4: 0x%x,"
+ DRM_DEBUG_KMS("dpll_hw_state: ebb0: 0x%x, ebb4: 0x%x,"
"pll0: 0x%x, pll1: 0x%x, pll2: 0x%x, pll3: 0x%x, "
"pll6: 0x%x, pll8: 0x%x, pll9: 0x%x, pll10: 0x%x, pcsdw12: 0x%x\n",
- pipe_config->ddi_pll_sel,
pipe_config->dpll_hw_state.ebb0,
pipe_config->dpll_hw_state.ebb4,
pipe_config->dpll_hw_state.pll0,
pipe_config->dpll_hw_state.pll10,
pipe_config->dpll_hw_state.pcsdw12);
} else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
- DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: "
+ DRM_DEBUG_KMS("dpll_hw_state: "
"ctrl1: 0x%x, cfgcr1: 0x%x, cfgcr2: 0x%x\n",
- pipe_config->ddi_pll_sel,
pipe_config->dpll_hw_state.ctrl1,
pipe_config->dpll_hw_state.cfgcr1,
pipe_config->dpll_hw_state.cfgcr2);
} else if (HAS_DDI(dev)) {
- DRM_DEBUG_KMS("ddi_pll_sel: 0x%x; dpll_hw_state: wrpll: 0x%x spll: 0x%x\n",
- pipe_config->ddi_pll_sel,
+ DRM_DEBUG_KMS("dpll_hw_state: wrpll: 0x%x spll: 0x%x\n",
pipe_config->dpll_hw_state.wrpll,
pipe_config->dpll_hw_state.spll);
} else {
DRM_DEBUG_KMS("planes on this crtc\n");
list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+ char *format_name;
intel_plane = to_intel_plane(plane);
if (intel_plane->pipe != crtc->pipe)
continue;
continue;
}
+ format_name = drm_get_format_name(fb->pixel_format);
+
DRM_DEBUG_KMS("[PLANE:%d:%s] enabled",
plane->base.id, plane->name);
DRM_DEBUG_KMS("\tFB:%d, fb = %ux%u format = %s",
- fb->base.id, fb->width, fb->height,
- drm_get_format_name(fb->pixel_format));
+ fb->base.id, fb->width, fb->height, format_name);
DRM_DEBUG_KMS("\tscaler:%d src %dx%d+%d+%d dst %dx%d+%d+%d\n",
state->scaler_id,
- state->src.x1 >> 16, state->src.y1 >> 16,
- drm_rect_width(&state->src) >> 16,
- drm_rect_height(&state->src) >> 16,
- state->dst.x1, state->dst.y1,
- drm_rect_width(&state->dst),
- drm_rect_height(&state->dst));
+ state->base.src.x1 >> 16,
+ state->base.src.y1 >> 16,
+ drm_rect_width(&state->base.src) >> 16,
+ drm_rect_height(&state->base.src) >> 16,
+ state->base.dst.x1, state->base.dst.y1,
+ drm_rect_width(&state->base.dst),
+ drm_rect_height(&state->base.dst));
+
+ kfree(format_name);
}
}
struct drm_device *dev = state->dev;
struct drm_connector *connector;
unsigned int used_ports = 0;
+ unsigned int used_mst_ports = 0;
/*
* Walk the connector list instead of the encoder
return false;
used_ports |= port_mask;
+ break;
+ case INTEL_OUTPUT_DP_MST:
+ used_mst_ports |=
+ 1 << enc_to_mst(&encoder->base)->primary->port;
+ break;
default:
break;
}
}
+ /* can't mix MST and SST/HDMI on the same port */
+ if (used_ports & used_mst_ports)
+ return false;
+
return true;
}
struct intel_crtc_scaler_state scaler_state;
struct intel_dpll_hw_state dpll_hw_state;
struct intel_shared_dpll *shared_dpll;
- uint32_t ddi_pll_sel;
bool force_thru;
/* FIXME: before the switch to atomic started, a new pipe_config was
scaler_state = crtc_state->scaler_state;
shared_dpll = crtc_state->shared_dpll;
dpll_hw_state = crtc_state->dpll_hw_state;
- ddi_pll_sel = crtc_state->ddi_pll_sel;
force_thru = crtc_state->pch_pfit.force_thru;
memset(crtc_state, 0, sizeof *crtc_state);
crtc_state->scaler_state = scaler_state;
crtc_state->shared_dpll = shared_dpll;
crtc_state->dpll_hw_state = dpll_hw_state;
- crtc_state->ddi_pll_sel = ddi_pll_sel;
crtc_state->pch_pfit.force_thru = force_thru;
}
encoder = to_intel_encoder(connector_state->best_encoder);
- if (!(encoder->compute_config(encoder, pipe_config))) {
+ if (!(encoder->compute_config(encoder, pipe_config, connector_state))) {
DRM_DEBUG_KMS("Encoder config failure\n");
goto fail;
}
return false;
}
-#define for_each_intel_crtc_masked(dev, mask, intel_crtc) \
- list_for_each_entry((intel_crtc), \
- &(dev)->mode_config.crtc_list, \
- base.head) \
- for_each_if (mask & (1 <<(intel_crtc)->pipe))
-
static bool
intel_compare_m_n(unsigned int m, unsigned int n,
unsigned int m2, unsigned int n2,
PIPE_CONF_CHECK_I(double_wide);
- PIPE_CONF_CHECK_X(ddi_pll_sel);
-
PIPE_CONF_CHECK_P(shared_dpll);
PIPE_CONF_CHECK_X(dpll_hw_state.dpll);
PIPE_CONF_CHECK_X(dpll_hw_state.dpll_md);
hw_entry->start, hw_entry->end);
}
- /* cursor */
- hw_entry = &hw_ddb.plane[pipe][PLANE_CURSOR];
- sw_entry = &sw_ddb->plane[pipe][PLANE_CURSOR];
-
- if (!skl_ddb_entry_equal(hw_entry, sw_entry)) {
- DRM_ERROR("mismatch in DDB state pipe %c cursor "
- "(expected (%u,%u), found (%u,%u))\n",
- pipe_name(pipe),
- sw_entry->start, sw_entry->end,
- hw_entry->start, hw_entry->end);
+ /*
+ * cursor
+ * If the cursor plane isn't active, we may not have updated it's ddb
+ * allocation. In that case since the ddb allocation will be updated
+ * once the plane becomes visible, we can skip this check
+ */
+ if (intel_crtc->cursor_addr) {
+ hw_entry = &hw_ddb.plane[pipe][PLANE_CURSOR];
+ sw_entry = &sw_ddb->plane[pipe][PLANE_CURSOR];
+
+ if (!skl_ddb_entry_equal(hw_entry, sw_entry)) {
+ DRM_ERROR("mismatch in DDB state pipe %c cursor "
+ "(expected (%u,%u), found (%u,%u))\n",
+ pipe_name(pipe),
+ sw_entry->start, sw_entry->end,
+ hw_entry->start, hw_entry->end);
+ }
}
}
if (!intel_plane_state->wait_req)
continue;
- ret = __i915_wait_request(intel_plane_state->wait_req,
- true, NULL, NULL);
+ ret = i915_wait_request(intel_plane_state->wait_req,
+ I915_WAIT_INTERRUPTIBLE,
+ NULL, NULL);
if (ret) {
/* Any hang should be swallowed by the wait */
WARN_ON(ret == -EIO);
return false;
}
+static void intel_update_crtc(struct drm_crtc *crtc,
+ struct drm_atomic_state *state,
+ struct drm_crtc_state *old_crtc_state,
+ unsigned int *crtc_vblank_mask)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = to_i915(dev);
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc->state);
+ bool modeset = needs_modeset(crtc->state);
+
+ if (modeset) {
+ update_scanline_offset(intel_crtc);
+ dev_priv->display.crtc_enable(pipe_config, state);
+ } else {
+ intel_pre_plane_update(to_intel_crtc_state(old_crtc_state));
+ }
+
+ if (drm_atomic_get_existing_plane_state(state, crtc->primary)) {
+ intel_fbc_enable(
+ intel_crtc, pipe_config,
+ to_intel_plane_state(crtc->primary->state));
+ }
+
+ drm_atomic_helper_commit_planes_on_crtc(old_crtc_state);
+
+ if (needs_vblank_wait(pipe_config))
+ *crtc_vblank_mask |= drm_crtc_mask(crtc);
+}
+
+static void intel_update_crtcs(struct drm_atomic_state *state,
+ unsigned int *crtc_vblank_mask)
+{
+ struct drm_crtc *crtc;
+ struct drm_crtc_state *old_crtc_state;
+ int i;
+
+ for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
+ if (!crtc->state->active)
+ continue;
+
+ intel_update_crtc(crtc, state, old_crtc_state,
+ crtc_vblank_mask);
+ }
+}
+
+static void skl_update_crtcs(struct drm_atomic_state *state,
+ unsigned int *crtc_vblank_mask)
+{
+ struct drm_device *dev = state->dev;
+ struct drm_i915_private *dev_priv = to_i915(dev);
+ struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+ struct drm_crtc *crtc;
+ struct drm_crtc_state *old_crtc_state;
+ struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
+ struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
+ unsigned int updated = 0;
+ bool progress;
+ enum pipe pipe;
+
+ /*
+ * Whenever the number of active pipes changes, we need to make sure we
+ * update the pipes in the right order so that their ddb allocations
+ * never overlap with eachother inbetween CRTC updates. Otherwise we'll
+ * cause pipe underruns and other bad stuff.
+ */
+ do {
+ int i;
+ progress = false;
+
+ for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
+ bool vbl_wait = false;
+ unsigned int cmask = drm_crtc_mask(crtc);
+ pipe = to_intel_crtc(crtc)->pipe;
+
+ if (updated & cmask || !crtc->state->active)
+ continue;
+ if (skl_ddb_allocation_overlaps(state, cur_ddb, new_ddb,
+ pipe))
+ continue;
+
+ updated |= cmask;
+
+ /*
+ * If this is an already active pipe, it's DDB changed,
+ * and this isn't the last pipe that needs updating
+ * then we need to wait for a vblank to pass for the
+ * new ddb allocation to take effect.
+ */
+ if (!skl_ddb_allocation_equals(cur_ddb, new_ddb, pipe) &&
+ !crtc->state->active_changed &&
+ intel_state->wm_results.dirty_pipes != updated)
+ vbl_wait = true;
+
+ intel_update_crtc(crtc, state, old_crtc_state,
+ crtc_vblank_mask);
+
+ if (vbl_wait)
+ intel_wait_for_vblank(dev, pipe);
+
+ progress = true;
+ }
+ } while (progress);
+}
+
static void intel_atomic_commit_tail(struct drm_atomic_state *state)
{
struct drm_device *dev = state->dev;
if (!intel_plane_state->wait_req)
continue;
- ret = __i915_wait_request(intel_plane_state->wait_req,
- true, NULL, NULL);
+ ret = i915_wait_request(intel_plane_state->wait_req,
+ 0, NULL, NULL);
/* EIO should be eaten, and we can't get interrupted in the
* worker, and blocking commits have waited already. */
WARN_ON(ret);
if (old_crtc_state->active) {
intel_crtc_disable_planes(crtc, old_crtc_state->plane_mask);
- dev_priv->display.crtc_disable(crtc);
+ dev_priv->display.crtc_disable(to_intel_crtc_state(old_crtc_state), state);
intel_crtc->active = false;
intel_fbc_disable(intel_crtc);
intel_disable_shared_dpll(intel_crtc);
intel_modeset_verify_disabled(dev);
}
- /* Now enable the clocks, plane, pipe, and connectors that we set up. */
+ /* Complete the events for pipes that have now been disabled */
for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
- struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
bool modeset = needs_modeset(crtc->state);
- struct intel_crtc_state *pipe_config =
- to_intel_crtc_state(crtc->state);
-
- if (modeset && crtc->state->active) {
- update_scanline_offset(to_intel_crtc(crtc));
- dev_priv->display.crtc_enable(crtc);
- }
/* Complete events for now disable pipes here. */
if (modeset && !crtc->state->active && crtc->state->event) {
crtc->state->event = NULL;
}
-
- if (!modeset)
- intel_pre_plane_update(to_intel_crtc_state(old_crtc_state));
-
- if (crtc->state->active &&
- drm_atomic_get_existing_plane_state(state, crtc->primary))
- intel_fbc_enable(intel_crtc, pipe_config, to_intel_plane_state(crtc->primary->state));
-
- if (crtc->state->active)
- drm_atomic_helper_commit_planes_on_crtc(old_crtc_state);
-
- if (pipe_config->base.active && needs_vblank_wait(pipe_config))
- crtc_vblank_mask |= 1 << i;
}
+ /* Now enable the clocks, plane, pipe, and connectors that we set up. */
+ dev_priv->display.update_crtcs(state, &crtc_vblank_mask);
+
/* FIXME: We should call drm_atomic_helper_commit_hw_done() here
* already, but still need the state for the delayed optimization. To
* fix this:
{
struct drm_plane_state *old_plane_state;
struct drm_plane *plane;
- struct drm_i915_gem_object *obj, *old_obj;
- struct intel_plane *intel_plane;
int i;
- mutex_lock(&state->dev->struct_mutex);
- for_each_plane_in_state(state, plane, old_plane_state, i) {
- obj = intel_fb_obj(plane->state->fb);
- old_obj = intel_fb_obj(old_plane_state->fb);
- intel_plane = to_intel_plane(plane);
-
- i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
- }
- mutex_unlock(&state->dev->struct_mutex);
+ for_each_plane_in_state(state, plane, old_plane_state, i)
+ i915_gem_track_fb(intel_fb_obj(old_plane_state->fb),
+ intel_fb_obj(plane->state->fb),
+ to_intel_plane(plane)->frontbuffer_bit);
}
/**
drm_atomic_state_free(state);
}
-#undef for_each_intel_crtc_masked
-
/*
* FIXME: Remove this once i915 is fully DRIVER_ATOMIC by calling
* drm_atomic_helper_legacy_gamma_set() directly.
*/
int
intel_prepare_plane_fb(struct drm_plane *plane,
- const struct drm_plane_state *new_state)
+ struct drm_plane_state *new_state)
{
struct drm_device *dev = plane->dev;
struct drm_framebuffer *fb = new_state->fb;
if (ret)
DRM_DEBUG_KMS("failed to attach phys object\n");
} else {
- ret = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+ struct i915_vma *vma;
+
+ vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+ if (IS_ERR(vma))
+ ret = PTR_ERR(vma);
}
if (ret == 0) {
- struct intel_plane_state *plane_state =
- to_intel_plane_state(new_state);
-
- i915_gem_request_assign(&plane_state->wait_req,
- obj->last_write_req);
+ to_intel_plane_state(new_state)->wait_req =
+ i915_gem_active_get(&obj->last_write,
+ &obj->base.dev->struct_mutex);
}
return ret;
*/
void
intel_cleanup_plane_fb(struct drm_plane *plane,
- const struct drm_plane_state *old_state)
+ struct drm_plane_state *old_state)
{
struct drm_device *dev = plane->dev;
struct intel_plane_state *old_intel_state;
+ struct intel_plane_state *intel_state = to_intel_plane_state(plane->state);
struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
!INTEL_INFO(dev)->cursor_needs_physical))
intel_unpin_fb_obj(old_state->fb, old_state->rotation);
+ i915_gem_request_assign(&intel_state->wait_req, NULL);
i915_gem_request_assign(&old_intel_state->wait_req, NULL);
}
struct intel_crtc_state *crtc_state,
struct intel_plane_state *state)
{
+ struct drm_i915_private *dev_priv = to_i915(plane->dev);
struct drm_crtc *crtc = state->base.crtc;
- struct drm_framebuffer *fb = state->base.fb;
int min_scale = DRM_PLANE_HELPER_NO_SCALING;
int max_scale = DRM_PLANE_HELPER_NO_SCALING;
bool can_position = false;
+ int ret;
- if (INTEL_INFO(plane->dev)->gen >= 9) {
+ if (INTEL_GEN(dev_priv) >= 9) {
/* use scaler when colorkey is not required */
if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
min_scale = 1;
can_position = true;
}
- return drm_plane_helper_check_update(plane, crtc, fb, &state->src,
- &state->dst, &state->clip,
- state->base.rotation,
- min_scale, max_scale,
- can_position, true,
- &state->visible);
+ ret = drm_plane_helper_check_state(&state->base,
+ &state->clip,
+ min_scale, max_scale,
+ can_position, true);
+ if (ret)
+ return ret;
+
+ if (!state->base.fb)
+ return 0;
+
+ if (INTEL_GEN(dev_priv) >= 9) {
+ ret = skl_check_plane_surface(state);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
static void intel_begin_crtc_commit(struct drm_crtc *crtc,
struct drm_crtc_state *old_crtc_state)
{
struct drm_device *dev = crtc->dev;
+ struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
struct intel_crtc_state *old_intel_state =
to_intel_crtc_state(old_crtc_state);
bool modeset = needs_modeset(crtc->state);
+ enum pipe pipe = intel_crtc->pipe;
/* Perform vblank evasion around commit operation */
intel_pipe_update_start(intel_crtc);
if (to_intel_crtc_state(crtc->state)->update_pipe)
intel_update_pipe_config(intel_crtc, old_intel_state);
- else if (INTEL_INFO(dev)->gen >= 9)
+ else if (INTEL_GEN(dev_priv) >= 9) {
skl_detach_scalers(intel_crtc);
+
+ I915_WRITE(PIPE_WM_LINETIME(pipe),
+ dev_priv->wm.skl_hw.wm_linetime[pipe]);
+ }
}
static void intel_finish_crtc_commit(struct drm_crtc *crtc,
void intel_create_rotation_property(struct drm_device *dev, struct intel_plane *plane)
{
if (!dev->mode_config.rotation_property) {
- unsigned long flags = BIT(DRM_ROTATE_0) |
- BIT(DRM_ROTATE_180);
+ unsigned long flags = DRM_ROTATE_0 |
+ DRM_ROTATE_180;
if (INTEL_INFO(dev)->gen >= 9)
- flags |= BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270);
+ flags |= DRM_ROTATE_90 | DRM_ROTATE_270;
dev->mode_config.rotation_property =
drm_mode_create_rotation_property(dev, flags);
struct intel_crtc_state *crtc_state,
struct intel_plane_state *state)
{
- struct drm_crtc *crtc = crtc_state->base.crtc;
struct drm_framebuffer *fb = state->base.fb;
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
enum pipe pipe = to_intel_plane(plane)->pipe;
unsigned stride;
int ret;
- ret = drm_plane_helper_check_update(plane, crtc, fb, &state->src,
- &state->dst, &state->clip,
- state->base.rotation,
- DRM_PLANE_HELPER_NO_SCALING,
- DRM_PLANE_HELPER_NO_SCALING,
- true, true, &state->visible);
+ ret = drm_plane_helper_check_state(&state->base,
+ &state->clip,
+ DRM_PLANE_HELPER_NO_SCALING,
+ DRM_PLANE_HELPER_NO_SCALING,
+ true, true);
if (ret)
return ret;
* Refuse the put the cursor into that compromised position.
*/
if (IS_CHERRYVIEW(plane->dev) && pipe == PIPE_C &&
- state->visible && state->base.crtc_x < 0) {
+ state->base.visible && state->base.crtc_x < 0) {
DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
return -EINVAL;
}
if (!obj)
addr = 0;
else if (!INTEL_INFO(dev)->cursor_needs_physical)
- addr = i915_gem_obj_ggtt_offset(obj);
+ addr = i915_gem_object_ggtt_offset(obj, NULL);
else
addr = obj->phys_handle->busaddr;
if (!dev->mode_config.rotation_property)
dev->mode_config.rotation_property =
drm_mode_create_rotation_property(dev,
- BIT(DRM_ROTATE_0) |
- BIT(DRM_ROTATE_180));
+ DRM_ROTATE_0 |
+ DRM_ROTATE_180);
if (dev->mode_config.rotation_property)
drm_object_attach_property(&cursor->base.base,
dev->mode_config.rotation_property,
return true;
}
+void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv)
+{
+ int pps_num;
+ int pps_idx;
+
+ if (HAS_DDI(dev_priv))
+ return;
+ /*
+ * This w/a is needed at least on CPT/PPT, but to be sure apply it
+ * everywhere where registers can be write protected.
+ */
+ if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+ pps_num = 2;
+ else
+ pps_num = 1;
+
+ for (pps_idx = 0; pps_idx < pps_num; pps_idx++) {
+ u32 val = I915_READ(PP_CONTROL(pps_idx));
+
+ val = (val & ~PANEL_UNLOCK_MASK) | PANEL_UNLOCK_REGS;
+ I915_WRITE(PP_CONTROL(pps_idx), val);
+ }
+}
+
+static void intel_pps_init(struct drm_i915_private *dev_priv)
+{
+ if (HAS_PCH_SPLIT(dev_priv) || IS_BROXTON(dev_priv))
+ dev_priv->pps_mmio_base = PCH_PPS_BASE;
+ else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+ dev_priv->pps_mmio_base = VLV_PPS_BASE;
+ else
+ dev_priv->pps_mmio_base = PPS_BASE;
+
+ intel_pps_unlock_regs_wa(dev_priv);
+}
+
static void intel_setup_outputs(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_encoder *encoder;
bool dpd_is_edp = false;
+ intel_pps_init(dev_priv);
+
/*
* intel_edp_init_connector() depends on this completing first, to
* prevent the registeration of both eDP and LVDS and the incorrect
drm_framebuffer_cleanup(fb);
mutex_lock(&dev->struct_mutex);
WARN_ON(!intel_fb->obj->framebuffer_references--);
- drm_gem_object_unreference(&intel_fb->obj->base);
+ i915_gem_object_put(intel_fb->obj);
mutex_unlock(&dev->struct_mutex);
kfree(intel_fb);
}
struct drm_i915_gem_object *obj)
{
struct drm_i915_private *dev_priv = to_i915(dev);
- unsigned int aligned_height;
+ unsigned int tiling = i915_gem_object_get_tiling(obj);
int ret;
u32 pitch_limit, stride_alignment;
+ char *format_name;
WARN_ON(!mutex_is_locked(&dev->struct_mutex));
if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
- /* Enforce that fb modifier and tiling mode match, but only for
- * X-tiled. This is needed for FBC. */
- if (!!(obj->tiling_mode == I915_TILING_X) !=
- !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) {
+ /*
+ * If there's a fence, enforce that
+ * the fb modifier and tiling mode match.
+ */
+ if (tiling != I915_TILING_NONE &&
+ tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
DRM_DEBUG("tiling_mode doesn't match fb modifier\n");
return -EINVAL;
}
} else {
- if (obj->tiling_mode == I915_TILING_X)
+ if (tiling == I915_TILING_X) {
mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
- else if (obj->tiling_mode == I915_TILING_Y) {
+ } else if (tiling == I915_TILING_Y) {
DRM_DEBUG("No Y tiling for legacy addfb\n");
return -EINVAL;
}
return -EINVAL;
}
+ /*
+ * gen2/3 display engine uses the fence if present,
+ * so the tiling mode must match the fb modifier exactly.
+ */
+ if (INTEL_INFO(dev_priv)->gen < 4 &&
+ tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
+ DRM_DEBUG("tiling_mode must match fb modifier exactly on gen2/3\n");
+ return -EINVAL;
+ }
+
stride_alignment = intel_fb_stride_alignment(dev_priv,
mode_cmd->modifier[0],
mode_cmd->pixel_format);
return -EINVAL;
}
- if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED &&
- mode_cmd->pitches[0] != obj->stride) {
+ /*
+ * If there's a fence, enforce that
+ * the fb pitch and fence stride match.
+ */
+ if (tiling != I915_TILING_NONE &&
+ mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) {
DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n",
- mode_cmd->pitches[0], obj->stride);
+ mode_cmd->pitches[0],
+ i915_gem_object_get_stride(obj));
return -EINVAL;
}
break;
case DRM_FORMAT_XRGB1555:
if (INTEL_INFO(dev)->gen > 3) {
- DRM_DEBUG("unsupported pixel format: %s\n",
- drm_get_format_name(mode_cmd->pixel_format));
+ format_name = drm_get_format_name(mode_cmd->pixel_format);
+ DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ kfree(format_name);
return -EINVAL;
}
break;
case DRM_FORMAT_ABGR8888:
if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
INTEL_INFO(dev)->gen < 9) {
- DRM_DEBUG("unsupported pixel format: %s\n",
- drm_get_format_name(mode_cmd->pixel_format));
+ format_name = drm_get_format_name(mode_cmd->pixel_format);
+ DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ kfree(format_name);
return -EINVAL;
}
break;
case DRM_FORMAT_XRGB2101010:
case DRM_FORMAT_XBGR2101010:
if (INTEL_INFO(dev)->gen < 4) {
- DRM_DEBUG("unsupported pixel format: %s\n",
- drm_get_format_name(mode_cmd->pixel_format));
+ format_name = drm_get_format_name(mode_cmd->pixel_format);
+ DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ kfree(format_name);
return -EINVAL;
}
break;
case DRM_FORMAT_ABGR2101010:
if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
- DRM_DEBUG("unsupported pixel format: %s\n",
- drm_get_format_name(mode_cmd->pixel_format));
+ format_name = drm_get_format_name(mode_cmd->pixel_format);
+ DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ kfree(format_name);
return -EINVAL;
}
break;
case DRM_FORMAT_YVYU:
case DRM_FORMAT_VYUY:
if (INTEL_INFO(dev)->gen < 5) {
- DRM_DEBUG("unsupported pixel format: %s\n",
- drm_get_format_name(mode_cmd->pixel_format));
+ format_name = drm_get_format_name(mode_cmd->pixel_format);
+ DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ kfree(format_name);
return -EINVAL;
}
break;
default:
- DRM_DEBUG("unsupported pixel format: %s\n",
- drm_get_format_name(mode_cmd->pixel_format));
+ format_name = drm_get_format_name(mode_cmd->pixel_format);
+ DRM_DEBUG("unsupported pixel format: %s\n", format_name);
+ kfree(format_name);
return -EINVAL;
}
if (mode_cmd->offsets[0] != 0)
return -EINVAL;
- aligned_height = intel_fb_align_height(dev, mode_cmd->height,
- mode_cmd->pixel_format,
- mode_cmd->modifier[0]);
- /* FIXME drm helper for size checks (especially planar formats)? */
- if (obj->base.size < aligned_height * mode_cmd->pitches[0])
- return -EINVAL;
-
drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd);
intel_fb->obj = obj;
- intel_fill_fb_info(dev_priv, &intel_fb->base);
+ ret = intel_fill_fb_info(dev_priv, &intel_fb->base);
+ if (ret)
+ return ret;
ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
if (ret) {
struct drm_i915_gem_object *obj;
struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd;
- obj = to_intel_bo(drm_gem_object_lookup(filp, mode_cmd.handles[0]));
- if (&obj->base == NULL)
+ obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]);
+ if (!obj)
return ERR_PTR(-ENOENT);
fb = intel_framebuffer_create(dev, &mode_cmd, obj);
if (IS_ERR(fb))
- drm_gem_object_unreference_unlocked(&obj->base);
+ i915_gem_object_put_unlocked(obj);
return fb;
}
skl_modeset_calc_cdclk;
}
+ if (dev_priv->info.gen >= 9)
+ dev_priv->display.update_crtcs = skl_update_crtcs;
+ else
+ dev_priv->display.update_crtcs = intel_update_crtcs;
+
switch (INTEL_INFO(dev_priv)->gen) {
case 2:
dev_priv->display.queue_flip = intel_gen2_queue_flip;
static void i915_disable_vga(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
+ struct pci_dev *pdev = dev_priv->drm.pdev;
u8 sr1;
i915_reg_t vga_reg = i915_vgacntrl_reg(dev);
/* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */
- vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO);
+ vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO);
outb(SR01, VGA_SR_INDEX);
sr1 = inb(VGA_SR_DATA);
outb(sr1 | 1<<5, VGA_SR_DATA);
- vga_put(dev->pdev, VGA_RSRC_LEGACY_IO);
+ vga_put(pdev, VGA_RSRC_LEGACY_IO);
udelay(300);
I915_WRITE(vga_reg, VGA_DISP_DISABLE);
dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq;
intel_init_clock_gating(dev);
- intel_enable_gt_powersave(dev_priv);
}
/*
return false;
}
-static bool intel_encoder_has_connectors(struct intel_encoder *encoder)
+static struct intel_connector *intel_encoder_find_connector(struct intel_encoder *encoder)
{
struct drm_device *dev = encoder->base.dev;
struct intel_connector *connector;
for_each_connector_on_encoder(dev, &encoder->base, connector)
- return true;
+ return connector;
- return false;
+ return NULL;
+}
+
+static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
+ enum transcoder pch_transcoder)
+{
+ return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) ||
+ (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
}
static void intel_sanitize_crtc(struct intel_crtc *crtc)
* Temporarily change the plane mapping and disable everything
* ... */
plane = crtc->plane;
- to_intel_plane_state(crtc->base.primary->state)->visible = true;
+ to_intel_plane_state(crtc->base.primary->state)->base.visible = true;
crtc->plane = !plane;
intel_crtc_disable_noatomic(&crtc->base);
crtc->plane = plane;
* worst a fifo underrun happens which also sets this to false.
*/
crtc->cpu_fifo_underrun_disabled = true;
- crtc->pch_fifo_underrun_disabled = true;
+ /*
+ * We track the PCH trancoder underrun reporting state
+ * within the crtc. With crtc for pipe A housing the underrun
+ * reporting state for PCH transcoder A, crtc for pipe B housing
+ * it for PCH transcoder B, etc. LPT-H has only PCH transcoder A,
+ * and marking underrun reporting as disabled for the non-existing
+ * PCH transcoders B and C would prevent enabling the south
+ * error interrupt (see cpt_can_enable_serr_int()).
+ */
+ if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe))
+ crtc->pch_fifo_underrun_disabled = true;
}
}
static void intel_sanitize_encoder(struct intel_encoder *encoder)
{
struct intel_connector *connector;
- struct drm_device *dev = encoder->base.dev;
/* We need to check both for a crtc link (meaning that the
* encoder is active and trying to read from a pipe) and the
bool has_active_crtc = encoder->base.crtc &&
to_intel_crtc(encoder->base.crtc)->active;
- if (intel_encoder_has_connectors(encoder) && !has_active_crtc) {
+ connector = intel_encoder_find_connector(encoder);
+ if (connector && !has_active_crtc) {
DRM_DEBUG_KMS("[ENCODER:%d:%s] has active connectors but no active pipe!\n",
encoder->base.base.id,
encoder->base.name);
* fallout from our resume register restoring. Disable
* the encoder manually again. */
if (encoder->base.crtc) {
+ struct drm_crtc_state *crtc_state = encoder->base.crtc->state;
+
DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n",
encoder->base.base.id,
encoder->base.name);
- encoder->disable(encoder);
+ encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
if (encoder->post_disable)
- encoder->post_disable(encoder);
+ encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
}
encoder->base.crtc = NULL;
* a bug in one of the get_hw_state functions. Or someplace else
* in our code, like the register restore mess on resume. Clamp
* things to off as a safer default. */
- for_each_intel_connector(dev, connector) {
- if (connector->encoder != encoder)
- continue;
- connector->base.dpms = DRM_MODE_DPMS_OFF;
- connector->base.encoder = NULL;
- }
+
+ connector->base.dpms = DRM_MODE_DPMS_OFF;
+ connector->base.encoder = NULL;
}
/* Enabled encoders without active connectors will be fixed in
* the crtc fixup. */
struct intel_plane_state *plane_state =
to_intel_plane_state(primary->state);
- plane_state->visible = crtc->active &&
+ plane_state->base.visible = crtc->active &&
primary_get_hw_state(to_intel_plane(primary));
- if (plane_state->visible)
+ if (plane_state->base.visible)
crtc->base.state->plane_mask |= 1 << drm_plane_index(primary);
}
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_crtc *c;
struct drm_i915_gem_object *obj;
- int ret;
intel_init_gt_powersave(dev_priv);
* for this.
*/
for_each_crtc(dev, c) {
+ struct i915_vma *vma;
+
obj = intel_fb_obj(c->primary->fb);
if (obj == NULL)
continue;
mutex_lock(&dev->struct_mutex);
- ret = intel_pin_and_fence_fb_obj(c->primary->fb,
+ vma = intel_pin_and_fence_fb_obj(c->primary->fb,
c->primary->state->rotation);
mutex_unlock(&dev->struct_mutex);
- if (ret) {
+ if (IS_ERR(vma)) {
DRM_ERROR("failed to pin boot fb on pipe %d\n",
to_intel_crtc(c)->pipe);
drm_framebuffer_unreference(c->primary->fb);
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_crtc_helper.h>
-#include <linux/fb.h>
#include <linux/clk.h>
#include <linux/errno.h>
#include <drm/drm_gem_cma_helper.h>
ipu_di_enable(ipu_crtc->di);
}
-static void ipu_crtc_disable(struct drm_crtc *crtc)
+static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
+ struct drm_crtc_state *old_crtc_state)
{
struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
struct ipu_soc *ipu = dev_get_drvdata(ipu_crtc->dev->parent);
}
spin_unlock_irq(&crtc->dev->event_lock);
+ /* always disable planes on the CRTC */
+ drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, true);
++
+ drm_crtc_vblank_off(crtc);
}
static void imx_drm_crtc_reset(struct drm_crtc *crtc)
kfree(to_imx_crtc_state(state));
}
+static void imx_drm_crtc_destroy(struct drm_crtc *crtc)
+{
+ imx_drm_remove_crtc(to_ipu_crtc(crtc)->imx_crtc);
+}
+
static const struct drm_crtc_funcs ipu_crtc_funcs = {
.set_config = drm_atomic_helper_set_config,
- .destroy = drm_crtc_cleanup,
+ .destroy = imx_drm_crtc_destroy,
.page_flip = drm_atomic_helper_page_flip,
.reset = imx_drm_crtc_reset,
.atomic_duplicate_state = imx_drm_crtc_duplicate_state,
{
struct ipu_crtc *ipu_crtc = dev_id;
- imx_drm_handle_vblank(ipu_crtc->imx_crtc);
+ drm_crtc_handle_vblank(&ipu_crtc->base);
return IRQ_HANDLED;
}
static void ipu_crtc_atomic_begin(struct drm_crtc *crtc,
struct drm_crtc_state *old_crtc_state)
{
+ drm_crtc_vblank_on(crtc);
+
spin_lock_irq(&crtc->dev->event_lock);
if (crtc->state->event) {
WARN_ON(drm_crtc_vblank_get(crtc));
.mode_set_nofb = ipu_crtc_mode_set_nofb,
.atomic_check = ipu_crtc_atomic_check,
.atomic_begin = ipu_crtc_atomic_begin,
- .disable = ipu_crtc_disable,
+ .atomic_disable = ipu_crtc_atomic_disable,
.enable = ipu_crtc_enable,
};
{
struct ipu_crtc *ipu_crtc = dev_get_drvdata(dev);
- imx_drm_remove_crtc(ipu_crtc->imx_crtc);
-
ipu_put_resources(ipu_crtc);
if (ipu_crtc->plane[1])
ipu_plane_put_resources(ipu_crtc->plane[1]);
{
struct drm_gem_object *obj = vma->vm_private_data;
struct drm_device *dev = obj->dev;
+ struct msm_drm_private *priv = dev->dev_private;
struct page **pages;
unsigned long pfn;
pgoff_t pgoff;
int ret;
+ /* This should only happen if userspace tries to pass a mmap'd
+ * but unfaulted gem bo vaddr into submit ioctl, triggering
+ * a page fault while struct_mutex is already held. This is
+ * not a valid use-case so just bail.
+ */
+ if (priv->struct_mutex_task == current)
+ return VM_FAULT_SIGBUS;
+
/* Make sure we don't parallel update on a fault, nor move or remove
* something from beneath our feet
*/
{
struct msm_gem_object *msm_obj = to_msm_bo(obj);
bool write = !!(op & MSM_PREP_WRITE);
-
- if (op & MSM_PREP_NOSYNC) {
- if (!reservation_object_test_signaled_rcu(msm_obj->resv, write))
- return -EBUSY;
- } else {
- int ret;
-
- ret = reservation_object_wait_timeout_rcu(msm_obj->resv, write,
- true, timeout_to_jiffies(timeout));
- if (ret <= 0)
- return ret == 0 ? -ETIMEDOUT : ret;
- }
+ unsigned long remain =
+ op & MSM_PREP_NOSYNC ? 0 : timeout_to_jiffies(timeout);
+ long ret;
+
+ ret = reservation_object_wait_timeout_rcu(msm_obj->resv, write,
+ true, remain);
+ if (ret == 0)
+ return remain == 0 ? -EBUSY : -ETIMEDOUT;
+ else if (ret < 0)
+ return ret;
/* TODO cache maintenance */
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/sync_file.h>
+
#include "msm_drv.h"
#include "msm_gpu.h"
#include "msm_gem.h"
kfree(submit);
}
+ static inline unsigned long __must_check
+ copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
+ {
+ if (access_ok(VERIFY_READ, from, n))
+ return __copy_from_user_inatomic(to, from, n);
+ return -EFAULT;
+ }
+
static int submit_lookup_objects(struct msm_gem_submit *submit,
struct drm_msm_gem_submit *args, struct drm_file *file)
{
int ret = 0;
spin_lock(&file->table_lock);
+ pagefault_disable();
for (i = 0; i < args->nr_bos; i++) {
struct drm_msm_gem_submit_bo submit_bo;
*/
submit->bos[i].flags = 0;
- ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
- if (ret) {
- ret = -EFAULT;
- goto out_unlock;
+ ret = copy_from_user_inatomic(&submit_bo, userptr, sizeof(submit_bo));
+ if (unlikely(ret)) {
+ pagefault_enable();
+ spin_unlock(&file->table_lock);
+ ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
+ if (ret)
+ goto out;
+ spin_lock(&file->table_lock);
+ pagefault_disable();
}
if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) {
}
out_unlock:
- submit->nr_bos = i;
+ pagefault_enable();
spin_unlock(&file->table_lock);
+ out:
+ submit->nr_bos = i;
+
return ret;
}
struct msm_file_private *ctx = file->driver_priv;
struct msm_gem_submit *submit;
struct msm_gpu *gpu = priv->gpu;
+ struct fence *in_fence = NULL;
+ struct sync_file *sync_file = NULL;
+ int out_fence_fd = -1;
unsigned i;
int ret;
/* for now, we just have 3d pipe.. eventually this would need to
* be more clever to dispatch to appropriate gpu module:
*/
- if (args->pipe != MSM_PIPE_3D0)
+ if (MSM_PIPE_ID(args->flags) != MSM_PIPE_3D0)
+ return -EINVAL;
+
+ if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS)
return -EINVAL;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
+ if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
+ out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (out_fence_fd < 0) {
+ ret = out_fence_fd;
+ goto out_unlock;
+ }
+ }
+ priv->struct_mutex_task = current;
submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds);
if (!submit) {
if (ret)
goto out;
- ret = submit_fence_sync(submit);
- if (ret)
- goto out;
+ if (args->flags & MSM_SUBMIT_FENCE_FD_IN) {
+ in_fence = sync_file_get_fence(args->fence_fd);
+
+ if (!in_fence) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* TODO if we get an array-fence due to userspace merging multiple
+ * fences, we need a way to determine if all the backing fences
+ * are from our own context..
+ */
+
+ if (in_fence->context != gpu->fctx->context) {
+ ret = fence_wait(in_fence, true);
+ if (ret)
+ goto out;
+ }
+
+ }
+
+ if (!(args->fence & MSM_SUBMIT_NO_IMPLICIT)) {
+ ret = submit_fence_sync(submit);
+ if (ret)
+ goto out;
+ }
ret = submit_pin_objects(submit);
if (ret)
submit->nr_cmds = i;
- ret = msm_gpu_submit(gpu, submit, ctx);
+ submit->fence = msm_fence_alloc(gpu->fctx);
+ if (IS_ERR(submit->fence)) {
+ ret = PTR_ERR(submit->fence);
+ submit->fence = NULL;
+ goto out;
+ }
+
+ if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
+ sync_file = sync_file_create(submit->fence);
+ if (!sync_file) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ msm_gpu_submit(gpu, submit, ctx);
args->fence = submit->fence->seqno;
+ if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
+ fd_install(out_fence_fd, sync_file->file);
+ args->fence_fd = out_fence_fd;
+ }
+
out:
+ if (in_fence)
+ fence_put(in_fence);
submit_cleanup(submit);
if (ret)
msm_gem_submit_free(submit);
out_unlock:
+ if (ret && (out_fence_fd >= 0))
+ put_unused_fd(out_fence_fd);
+ priv->struct_mutex_task = NULL;
mutex_unlock(&dev->struct_mutex);
return ret;
}
if (radeon_crtc->ss.refdiv) {
radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv;
- if (rdev->family >= CHIP_RV770)
+ if (ASIC_IS_AVIVO(rdev) &&
+ rdev->family != CHIP_RS780 &&
+ rdev->family != CHIP_RS880)
radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
}
}
u32 tmp, viewport_w, viewport_h;
int r;
bool bypass_lut = false;
+ char *format_name;
/* no fb bound */
if (!atomic && !crtc->primary->fb) {
bypass_lut = true;
break;
default:
- DRM_ERROR("Unsupported screen format %s\n",
- drm_get_format_name(target_fb->pixel_format));
+ format_name = drm_get_format_name(target_fb->pixel_format);
+ DRM_ERROR("Unsupported screen format %s\n", format_name);
+ kfree(format_name);
return -EINVAL;
}
WREG32(EVERGREEN_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
(viewport_w << 16) | viewport_h);
- /* set pageflip to happen only at start of vblank interval (front porch) */
- WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3);
+ /* set pageflip to happen anywhere in vblank interval */
+ WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
radeon_fb = to_radeon_framebuffer(fb);
u32 viewport_w, viewport_h;
int r;
bool bypass_lut = false;
+ char *format_name;
/* no fb bound */
if (!atomic && !crtc->primary->fb) {
bypass_lut = true;
break;
default:
- DRM_ERROR("Unsupported screen format %s\n",
- drm_get_format_name(target_fb->pixel_format));
+ format_name = drm_get_format_name(target_fb->pixel_format);
+ DRM_ERROR("Unsupported screen format %s\n", format_name);
+ kfree(format_name);
return -EINVAL;
}
WREG32(AVIVO_D1MODE_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
(viewport_w << 16) | viewport_h);
- /* set pageflip to happen only at start of vblank interval (front porch) */
- WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3);
+ /* set pageflip to happen anywhere in vblank interval */
+ WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
radeon_fb = to_radeon_framebuffer(fb);
acpi_handle handle;
struct radeon_atpx_functions functions;
bool is_hybrid;
+ bool dgpu_req_power_for_displays;
};
static struct radeon_atpx_priv {
return radeon_atpx_priv.atpx.is_hybrid;
}
+bool radeon_atpx_dgpu_req_power_for_displays(void) {
+ return radeon_atpx_priv.atpx.dgpu_req_power_for_displays;
+}
+
/**
* radeon_atpx_call - call an ATPX method
*
atpx->is_hybrid = false;
if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
printk("ATPX Hybrid Graphics\n");
- #if 1
- /* This is a temporary hack until the D3 cold support
- * makes it upstream. The ATPX power_control method seems
- * to still work on even if the system should be using
- * the new standardized hybrid D3 cold ACPI interface.
- */
- atpx->functions.power_cntl = true;
- #else
atpx->functions.power_cntl = false;
- #endif
atpx->is_hybrid = true;
}
if (radeon_ttm_tt_has_userptr(bo->ttm))
return -EPERM;
- return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp);
+ return drm_vma_node_verify_access(&rbo->gem_base.vma_node,
+ filp->private_data);
}
static void radeon_move_null(struct ttm_buffer_object *bo,
rdev = radeon_get_rdev(bo->bdev);
ridx = radeon_copy_ring_index(rdev);
- old_start = old_mem->start << PAGE_SHIFT;
- new_start = new_mem->start << PAGE_SHIFT;
+ old_start = (u64)old_mem->start << PAGE_SHIFT;
+ new_start = (u64)new_mem->start << PAGE_SHIFT;
switch (old_mem->mem_type) {
case TTM_PL_VRAM:
if (unlikely(r)) {
goto out_cleanup;
}
- r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
+ r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem);
out_cleanup:
ttm_bo_mem_put(bo, &tmp_mem);
return r;
if (unlikely(r)) {
return r;
}
- r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
+ r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem);
if (unlikely(r)) {
goto out_cleanup;
}
if (r) {
memcpy:
- r = ttm_bo_move_memcpy(bo, evict, interruptible,
- no_wait_gpu, new_mem);
+ r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem);
if (r) {
return r;
}
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include "drm_fb_cma_helper.h"
+#include <drm/drm_fb_helper.h>
#include "uapi/drm/vc4_drm.h"
#include "vc4_drv.h"
switch (args->param) {
case DRM_VC4_PARAM_V3D_IDENT0:
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
- if (ret)
+ if (ret < 0)
return ret;
args->value = V3D_READ(V3D_IDENT0);
pm_runtime_put(&vc4->v3d->pdev->dev);
break;
case DRM_VC4_PARAM_V3D_IDENT1:
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
- if (ret)
+ if (ret < 0)
return ret;
args->value = V3D_READ(V3D_IDENT1);
pm_runtime_put(&vc4->v3d->pdev->dev);
break;
case DRM_VC4_PARAM_V3D_IDENT2:
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
- if (ret)
+ if (ret < 0)
return ret;
args->value = V3D_READ(V3D_IDENT2);
pm_runtime_put(&vc4->v3d->pdev->dev);
ap->ranges[0].base = 0;
ap->ranges[0].size = ~0;
- remove_conflicting_framebuffers(ap, "vc4drmfb", false);
+ drm_fb_helper_remove_conflicting_framebuffers(ap, "vc4drmfb", false);
kfree(ap);
}
return -ENOMEM;
drm = drm_dev_alloc(&vc4_drm_driver, dev);
- if (!drm)
- return -ENOMEM;
+ if (IS_ERR(drm))
+ return PTR_ERR(drm);
platform_set_drvdata(pdev, drm);
vc4->dev = drm;
drm->dev_private = vc4;
vc4_flush_caches(dev);
- /* Disable the binner's pre-loaded overflow memory address */
- V3D_WRITE(V3D_BPOA, 0);
- V3D_WRITE(V3D_BPOS, 0);
-
/* Either put the job in the binner if it uses the binner, or
* immediately move it to the to-be-rendered queue.
*/
return -EINVAL;
}
- exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
- GFP_KERNEL);
+ exec->bo = drm_calloc_large(exec->bo_count,
+ sizeof(struct drm_gem_cma_object *));
if (!exec->bo) {
DRM_ERROR("Failed to allocate validated BO pointers\n");
return -ENOMEM;
spin_unlock(&file_priv->table_lock);
fail:
- kfree(handles);
- return 0;
+ drm_free_large(handles);
+ return ret;
}
static int
* read the contents back for validation, and I think the
* bo->vaddr is uncached access.
*/
- temp = kmalloc(temp_size, GFP_KERNEL);
+ temp = drm_malloc_ab(temp_size, 1);
if (!temp) {
DRM_ERROR("Failed to allocate storage for copying "
"in bin/render CLs.\n");
ret = vc4_validate_shader_recs(dev, exec);
fail:
- kfree(temp);
+ drm_free_large(temp);
return ret;
}
if (exec->bo) {
for (i = 0; i < exec->bo_count; i++)
drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
- kfree(exec->bo);
+ drm_free_large(exec->bo);
}
while (!list_empty(&exec->unref_list)) {
vc4->overflow_mem = NULL;
}
- vc4_bo_cache_destroy(dev);
-
if (vc4->hang_state)
vc4_free_hang_state(dev, vc4->hang_state);
+
+ vc4_bo_cache_destroy(dev);
}