From: Dave Airlie Date: Mon, 15 Aug 2016 06:53:57 +0000 (+1000) Subject: Merge tag 'drm-intel-next-2016-08-08' of git://anongit.freedesktop.org/drm-intel... X-Git-Tag: v4.9-rc1~41^2~40 X-Git-Url: https://repo.jachan.dev/linux.git/commitdiff_plain/fc93ff608b15ae32cde3006b7af860b59cac20ec?hp=-c Merge tag 'drm-intel-next-2016-08-08' of git://anongit.freedesktop.org/drm-intel into drm-next - refactor ddi buffer programming a bit (Ville) - large-scale renaming to untangle naming in the gem code (Chris) - rework vma/active tracking for accurately reaping idle mappings of shared objects (Chris) - misc dp sst/mst probing corner case fixes (Ville) - tons of cleanup&tunings all around in gem - lockless (rcu-protected) request lookup, plus use it everywhere for non(b)locking waits (Chris) - pipe crc debugfs fixes (Rodrigo) - random fixes all over * tag 'drm-intel-next-2016-08-08' of git://anongit.freedesktop.org/drm-intel: (222 commits) drm/i915: Update DRIVER_DATE to 20160808 drm/i915: fix aliasing_ppgtt leak drm/i915: Update comment before i915_spin_request drm/i915: Use drm official vblank_no_hw_counter callback. drm/i915: Fix copy_to_user usage for pipe_crc Revert "drm/i915: Track active streams also for DP SST" drm/i915: fix WaInsertDummyPushConstPs drm/i915: Assert that the request hasn't been retired drm/i915: Repack fence tiling mode and stride into a single integer drm/i915: Document and reject invalid tiling modes drm/i915: Remove locking for get_tiling drm/i915: Remove pinned check from madvise ioctl drm/i915: Reduce locking inside swfinish ioctl drm/i915: Remove (struct_mutex) locking for busy-ioctl drm/i915: Remove (struct_mutex) locking for wait-ioctl drm/i915: Do a nonblocking wait first in pread/pwrite drm/i915: Remove unused no-shrinker-steal drm/i915: Tidy generation of the GTT mmap offset drm/i915/shrinker: Wait before acquiring struct_mutex under oom drm/i915: Simplify do_idling() (Ironlake vt-d w/a) ... --- fc93ff608b15ae32cde3006b7af860b59cac20ec diff --combined drivers/gpu/drm/i915/i915_debugfs.c index 9b03cb2813bd,9bd41581b592..f62285c1ed7f --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@@ -91,7 -91,7 +91,7 @@@ static int i915_capabilities(struct seq static char get_active_flag(struct drm_i915_gem_object *obj) { - return obj->active ? '*' : ' '; + return i915_gem_object_is_active(obj) ? '*' : ' '; } static char get_pin_flag(struct drm_i915_gem_object *obj) @@@ -101,7 -101,7 +101,7 @@@ static char get_tiling_flag(struct drm_i915_gem_object *obj) { - switch (obj->tiling_mode) { + switch (i915_gem_object_get_tiling(obj)) { default: case I915_TILING_NONE: return ' '; case I915_TILING_X: return 'X'; @@@ -125,7 -125,7 +125,7 @@@ static u64 i915_gem_obj_total_ggtt_size struct i915_vma *vma; list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->is_ggtt && drm_mm_node_allocated(&vma->node)) + if (i915_vma_is_ggtt(vma) && drm_mm_node_allocated(&vma->node)) size += vma->node.size; } @@@ -138,6 -138,7 +138,7 @@@ describe_obj(struct seq_file *m, struc struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct intel_engine_cs *engine; struct i915_vma *vma; + unsigned int frontbuffer_bits; int pin_count = 0; enum intel_engine_id id; @@@ -155,17 -156,20 +156,20 @@@ obj->base.write_domain); for_each_engine_id(engine, dev_priv, id) seq_printf(m, "%x ", - i915_gem_request_get_seqno(obj->last_read_req[id])); + i915_gem_active_get_seqno(&obj->last_read[id], + &obj->base.dev->struct_mutex)); seq_printf(m, "] %x %x%s%s%s", - i915_gem_request_get_seqno(obj->last_write_req), - i915_gem_request_get_seqno(obj->last_fenced_req), + i915_gem_active_get_seqno(&obj->last_write, + &obj->base.dev->struct_mutex), + i915_gem_active_get_seqno(&obj->last_fence, + &obj->base.dev->struct_mutex), i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) seq_printf(m, " (name: %d)", obj->base.name); list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->pin_count > 0) + if (i915_vma_is_pinned(vma)) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); @@@ -174,10 -178,13 +178,13 @@@ if (obj->fence_reg != I915_FENCE_REG_NONE) seq_printf(m, " (fence: %d)", obj->fence_reg); list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; + seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", - vma->is_ggtt ? "g" : "pp", + i915_vma_is_ggtt(vma) ? "g" : "pp", vma->node.start, vma->node.size); - if (vma->is_ggtt) + if (i915_vma_is_ggtt(vma)) seq_printf(m, ", type: %u", vma->ggtt_view.type); seq_puts(m, ")"); } @@@ -192,11 -199,15 +199,15 @@@ *t = '\0'; seq_printf(m, " (%s mappable)", s); } - if (obj->last_write_req != NULL) - seq_printf(m, " (%s)", - i915_gem_request_get_engine(obj->last_write_req)->name); - if (obj->frontbuffer_bits) - seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); + + engine = i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); + if (engine) + seq_printf(m, " (%s)", engine->name); + + frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (frontbuffer_bits) + seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits); } static int i915_gem_object_list_info(struct seq_file *m, void *data) @@@ -338,46 -349,29 +349,29 @@@ static int per_file_stats(int id, void stats->count++; stats->total += obj->base.size; - + if (!obj->bind_count) + stats->unbound += obj->base.size; if (obj->base.name || obj->base.dma_buf) stats->shared += obj->base.size; - if (USES_FULL_PPGTT(obj->base.dev)) { - list_for_each_entry(vma, &obj->vma_list, obj_link) { - struct i915_hw_ppgtt *ppgtt; - - if (!drm_mm_node_allocated(&vma->node)) - continue; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; - if (vma->is_ggtt) { - stats->global += obj->base.size; - continue; - } + if (i915_vma_is_ggtt(vma)) { + stats->global += vma->node.size; + } else { + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm); - ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base); - if (ppgtt->file_priv != stats->file_priv) + if (ppgtt->base.file != stats->file_priv) continue; - - if (obj->active) /* XXX per-vma statistic */ - stats->active += obj->base.size; - else - stats->inactive += obj->base.size; - - return 0; - } - } else { - if (i915_gem_obj_ggtt_bound(obj)) { - stats->global += obj->base.size; - if (obj->active) - stats->active += obj->base.size; - else - stats->inactive += obj->base.size; - return 0; } - } - if (!list_empty(&obj->global_list)) - stats->unbound += obj->base.size; + if (i915_vma_is_active(vma)) + stats->active += vma->node.size; + else + stats->inactive += vma->node.size; + } return 0; } @@@ -425,8 -419,8 +419,8 @@@ static int per_file_ctx_stats(int id, v for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { if (ctx->engine[n].state) per_file_stats(0, ctx->engine[n].state, data); - if (ctx->engine[n].ringbuf) - per_file_stats(0, ctx->engine[n].ringbuf->obj, data); + if (ctx->engine[n].ring) + per_file_stats(0, ctx->engine[n].ring->obj, data); } return 0; @@@ -754,13 -748,13 +748,13 @@@ static int i915_gem_request_info(struc int count; count = 0; - list_for_each_entry(req, &engine->request_list, list) + list_for_each_entry(req, &engine->request_list, link) count++; if (count == 0) continue; seq_printf(m, "%s requests: %d\n", engine->name, count); - list_for_each_entry(req, &engine->request_list, list) { + list_for_each_entry(req, &engine->request_list, link) { struct task_struct *task; rcu_read_lock(); @@@ -768,7 -762,7 +762,7 @@@ if (req->pid) task = pid_task(req->pid, PIDTYPE_PID); seq_printf(m, " %x @ %d: %s [%d]\n", - req->seqno, + req->fence.seqno, (int) (jiffies - req->emitted_jiffies), task ? task->comm : "", task ? task->pid : -1); @@@ -1205,8 -1199,6 +1199,6 @@@ static int i915_frequency_info(struct s intel_runtime_pm_get(dev_priv); - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - if (IS_GEN5(dev)) { u16 rgvswctl = I915_READ16(MEMSWCTL); u16 rgvstat = I915_READ16(MEMSTAT_ILK); @@@ -1381,6 -1373,8 +1373,8 @@@ intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); seq_printf(m, "Min freq: %d MHz\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + seq_printf(m, "Boost freq: %d MHz\n", + intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); seq_printf(m, "Max freq: %d MHz\n", intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); seq_printf(m, @@@ -1419,7 -1413,7 +1413,7 @@@ static int i915_hangcheck_info(struct s intel_runtime_pm_get(dev_priv); for_each_engine_id(engine, dev_priv, id) { - acthd[id] = intel_ring_get_active_head(engine); + acthd[id] = intel_engine_get_active_head(engine); seqno[id] = intel_engine_get_seqno(engine); } @@@ -1602,6 -1596,7 +1596,7 @@@ static int gen6_drpc_info(struct seq_fi struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; + u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; unsigned forcewake_count; int count = 0, ret; @@@ -1629,6 -1624,10 +1624,10 @@@ rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); + if (INTEL_INFO(dev)->gen >= 9) { + gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE); + gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); + } mutex_unlock(&dev->struct_mutex); mutex_lock(&dev_priv->rps.hw_lock); sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); @@@ -1647,6 -1646,12 +1646,12 @@@ yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); seq_printf(m, "RC6 Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (INTEL_INFO(dev)->gen >= 9) { + seq_printf(m, "Render Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE)); + seq_printf(m, "Media Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } seq_printf(m, "Deep RC6 Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE)); seq_printf(m, "Deepest RC6 Enabled: %s\n", @@@ -1675,6 -1680,14 +1680,14 @@@ seq_printf(m, "Core Power Down: %s\n", yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); + if (INTEL_INFO(dev)->gen >= 9) { + seq_printf(m, "Render Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + } /* Not exactly sure what this is */ seq_printf(m, "RC6 \"Locked to RPn\" residency since boot: %u\n", @@@ -1692,7 -1705,7 +1705,7 @@@ GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); seq_printf(m, "RC6++ voltage: %dmV\n", GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); - return 0; + return i915_forcewake_domains(m, NULL); } static int i915_drpc_info(struct seq_file *m, void *unused) @@@ -1896,8 -1909,6 +1909,6 @@@ static int i915_ring_freq_table(struct intel_runtime_pm_get(dev_priv); - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); if (ret) goto out; @@@ -2019,12 -2030,11 +2030,11 @@@ static int i915_gem_framebuffer_info(st return 0; } - static void describe_ctx_ringbuf(struct seq_file *m, - struct intel_ringbuffer *ringbuf) + static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) { seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)", - ringbuf->space, ringbuf->head, ringbuf->tail, - ringbuf->last_retired_head); + ring->space, ring->head, ring->tail, + ring->last_retired_head); } static int i915_context_status(struct seq_file *m, void *unused) @@@ -2068,8 -2078,8 +2078,8 @@@ seq_putc(m, ce->initialised ? 'I' : 'i'); if (ce->state) describe_obj(m, ce->state); - if (ce->ringbuf) - describe_ctx_ringbuf(m, ce->ringbuf); + if (ce->ring) + describe_ctx_ring(m, ce->ring); seq_putc(m, '\n'); } @@@ -2467,13 -2477,7 +2477,7 @@@ static int i915_rps_boost_info(struct s list_empty(&file_priv->rps.link) ? "" : ", active"); rcu_read_unlock(); } - seq_printf(m, "Semaphore boosts: %d%s\n", - dev_priv->rps.semaphores.boosts, - list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active"); - seq_printf(m, "MMIO flip boosts: %d%s\n", - dev_priv->rps.mmioflips.boosts, - list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active"); - seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts); + seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts); spin_unlock(&dev_priv->rps.client_lock); mutex_unlock(&dev->filelist_mutex); @@@ -3089,12 -3093,12 +3093,12 @@@ static const char *plane_rotation(unsig */ snprintf(buf, sizeof(buf), "%s%s%s%s%s%s(0x%08x)", - (rotation & BIT(DRM_ROTATE_0)) ? "0 " : "", - (rotation & BIT(DRM_ROTATE_90)) ? "90 " : "", - (rotation & BIT(DRM_ROTATE_180)) ? "180 " : "", - (rotation & BIT(DRM_ROTATE_270)) ? "270 " : "", - (rotation & BIT(DRM_REFLECT_X)) ? "FLIPX " : "", - (rotation & BIT(DRM_REFLECT_Y)) ? "FLIPY " : "", + (rotation & DRM_ROTATE_0) ? "0 " : "", + (rotation & DRM_ROTATE_90) ? "90 " : "", + (rotation & DRM_ROTATE_180) ? "180 " : "", + (rotation & DRM_ROTATE_270) ? "270 " : "", + (rotation & DRM_REFLECT_X) ? "FLIPX " : "", + (rotation & DRM_REFLECT_Y) ? "FLIPY " : "", rotation); return buf; @@@ -3228,7 -3232,7 +3232,7 @@@ static int i915_semaphore_status(struc enum intel_engine_id id; int j, ret; - if (!i915_semaphore_is_enabled(dev_priv)) { + if (!i915.semaphores) { seq_puts(m, "Semaphores are disabled\n"); return 0; } @@@ -3621,7 -3625,6 +3625,6 @@@ i915_pipe_crc_read(struct file *filep, while (n_entries > 0) { struct intel_pipe_crc_entry *entry = &pipe_crc->entries[pipe_crc->tail]; - int ret; if (CIRC_CNT(pipe_crc->head, pipe_crc->tail, INTEL_PIPE_CRC_ENTRIES_NR) < 1) @@@ -3638,8 -3641,7 +3641,7 @@@ spin_unlock_irq(&pipe_crc->lock); - ret = copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN); - if (ret == PIPE_CRC_LINE_LEN) + if (copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN)) return -EFAULT; user_buf += PIPE_CRC_LINE_LEN; @@@ -4921,7 -4923,7 +4923,7 @@@ i915_drop_caches_set(void *data, u64 va return ret; if (val & DROP_ACTIVE) { - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, true); if (ret) goto unlock; } @@@ -4950,20 -4952,11 +4952,11 @@@ i915_max_freq_get(void *data, u64 *val { struct drm_device *dev = data; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; if (INTEL_INFO(dev)->gen < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); - if (ret) - return ret; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - return 0; } @@@ -4978,8 -4971,6 +4971,6 @@@ i915_max_freq_set(void *data, u64 val if (INTEL_INFO(dev)->gen < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); @@@ -5017,20 -5008,11 +5008,11 @@@ i915_min_freq_get(void *data, u64 *val { struct drm_device *dev = data; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); - if (ret) - return ret; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - return 0; } @@@ -5042,11 -5024,9 +5024,9 @@@ i915_min_freq_set(void *data, u64 val u32 hw_max, hw_min; int ret; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); @@@ -5268,7 -5248,8 +5248,8 @@@ static void broadwell_sseu_device_statu static int i915_sseu_status(struct seq_file *m, void *unused) { struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = to_i915(node->minor->dev); + struct drm_device *dev = &dev_priv->drm; struct sseu_dev_status stat; if (INTEL_INFO(dev)->gen < 8) @@@ -5298,6 -5279,9 +5279,9 @@@ seq_puts(m, "SSEU Device Status\n"); memset(&stat, 0, sizeof(stat)); + + intel_runtime_pm_get(dev_priv); + if (IS_CHERRYVIEW(dev)) { cherryview_sseu_device_status(dev, &stat); } else if (IS_BROADWELL(dev)) { @@@ -5305,6 -5289,9 +5289,9 @@@ } else if (INTEL_INFO(dev)->gen >= 9) { gen9_sseu_device_status(dev, &stat); } + + intel_runtime_pm_put(dev_priv); + seq_printf(m, " Enabled Slice Total: %u\n", stat.slice_total); seq_printf(m, " Enabled Subslice Total: %u\n", diff --combined drivers/gpu/drm/i915/i915_drv.c index 40cd16cf9772,8cfc264ec9f6..57eb380a2c21 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@@ -228,27 -228,6 +228,6 @@@ static void intel_detect_pch(struct drm pci_dev_put(pch); } - bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv) - { - if (INTEL_GEN(dev_priv) < 6) - return false; - - if (i915.semaphores >= 0) - return i915.semaphores; - - /* TODO: make semaphores and Execlists play nicely together */ - if (i915.enable_execlists) - return false; - - #ifdef CONFIG_INTEL_IOMMU - /* Enable semaphores on SNB when IO remapping is off */ - if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) - return false; - #endif - - return true; - } - static int i915_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@@ -324,7 -303,7 +303,7 @@@ value = 1; break; case I915_PARAM_HAS_SEMAPHORES: - value = i915_semaphore_is_enabled(dev_priv); + value = i915.semaphores; break; case I915_PARAM_HAS_PRIME_VMAP_FLUSH: value = 1; @@@ -706,7 -685,7 +685,7 @@@ static int i915_kick_out_firmware_fb(st primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; - ret = remove_conflicting_framebuffers(ap, "inteldrmfb", primary); + ret = drm_fb_helper_remove_conflicting_framebuffers(ap, "inteldrmfb", primary); kfree(ap); @@@ -999,6 -978,9 +978,9 @@@ static void intel_sanitize_options(stru i915.enable_ppgtt = intel_sanitize_enable_ppgtt(dev_priv, i915.enable_ppgtt); DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); + + i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores); + DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); } /** @@@ -1011,8 -993,6 +993,6 @@@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - struct i915_ggtt *ggtt = &dev_priv->ggtt; - uint32_t aperture_size; int ret; if (i915_inject_load_failure()) @@@ -1022,16 -1002,10 +1002,10 @@@ intel_sanitize_options(dev_priv); - ret = i915_ggtt_init_hw(dev); + ret = i915_ggtt_probe_hw(dev_priv); if (ret) return ret; - ret = i915_ggtt_enable_hw(dev); - if (ret) { - DRM_ERROR("failed to enable GGTT\n"); - goto out_ggtt; - } - /* WARNING: Apparently we must kick fbdev drivers before vgacon, * otherwise the vga fbdev driver falls over. */ ret = i915_kick_out_firmware_fb(dev_priv); @@@ -1046,6 -1020,16 +1020,16 @@@ goto out_ggtt; } + ret = i915_ggtt_init_hw(dev_priv); + if (ret) + return ret; + + ret = i915_ggtt_enable_hw(dev_priv); + if (ret) { + DRM_ERROR("failed to enable GGTT\n"); + goto out_ggtt; + } + pci_set_master(dev->pdev); /* overlay on gen2 is broken and can't address above 1G */ @@@ -1058,7 -1042,6 +1042,6 @@@ } } - /* 965GM sometimes incorrectly writes to hardware status page (HWS) * using 32bit addressing, overwriting memory if HWS is located * above 4GB. @@@ -1077,19 -1060,6 +1060,6 @@@ } } - aperture_size = ggtt->mappable_end; - - ggtt->mappable = - io_mapping_create_wc(ggtt->mappable_base, - aperture_size); - if (!ggtt->mappable) { - ret = -EIO; - goto out_ggtt; - } - - ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, - aperture_size); - pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); @@@ -1118,7 -1088,7 +1088,7 @@@ return 0; out_ggtt: - i915_ggtt_cleanup_hw(dev); + i915_ggtt_cleanup_hw(dev_priv); return ret; } @@@ -1130,15 -1100,12 +1100,12 @@@ static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - struct i915_ggtt *ggtt = &dev_priv->ggtt; if (dev->pdev->msi_enabled) pci_disable_msi(dev->pdev); pm_qos_remove_request(&dev_priv->pm_qos); - arch_phys_wc_del(ggtt->mtrr); - io_mapping_free(ggtt->mappable); - i915_ggtt_cleanup_hw(dev); + i915_ggtt_cleanup_hw(dev_priv); } /** @@@ -1343,7 -1310,7 +1310,7 @@@ void i915_driver_unload(struct drm_devi i915_destroy_error_state(dev); /* Flush any outstanding unpin_work. */ - flush_workqueue(dev_priv->wq); + drain_workqueue(dev_priv->wq); intel_guc_fini(dev); i915_gem_fini(dev); @@@ -1458,8 -1425,6 +1425,6 @@@ static int i915_drm_suspend(struct drm_ intel_guc_suspend(dev); - intel_suspend_gt_powersave(dev_priv); - intel_display_suspend(dev); intel_dp_mst_suspend(dev); @@@ -1586,15 -1551,13 +1551,13 @@@ static int i915_drm_resume(struct drm_d disable_rpm_wakeref_asserts(dev_priv); - ret = i915_ggtt_enable_hw(dev); + ret = i915_ggtt_enable_hw(dev_priv); if (ret) DRM_ERROR("failed to re-enable GGTT\n"); intel_csr_ucode_resume(dev_priv); - mutex_lock(&dev->struct_mutex); - i915_gem_restore_gtt_mappings(dev); - mutex_unlock(&dev->struct_mutex); + i915_gem_resume(dev); i915_restore_state(dev); intel_opregion_setup(dev_priv); @@@ -1652,6 -1615,7 +1615,7 @@@ intel_opregion_notify_adapter(dev_priv, PCI_D0); + intel_autoenable_gt_powersave(dev_priv); drm_kms_helper_poll_enable(dev); enable_rpm_wakeref_asserts(dev_priv); @@@ -1778,8 -1742,6 +1742,6 @@@ int i915_reset(struct drm_i915_private unsigned reset_counter; int ret; - intel_reset_gt_powersave(dev_priv); - mutex_lock(&dev->struct_mutex); /* Clear any previous failed attempts at recovery. Time to try again. */ @@@ -1835,8 -1797,7 +1797,7 @@@ * previous concerns that it doesn't respond well to some forms * of re-init after reset. */ - if (INTEL_INFO(dev)->gen > 5) - intel_enable_gt_powersave(dev_priv); + intel_autoenable_gt_powersave(dev_priv); return 0; @@@ -2462,7 -2423,6 +2423,6 @@@ static int intel_runtime_resume(struct * we can do is to hope that things will still work (and disable RPM). */ i915_gem_init_swizzling(dev); - gen6_update_ring_freq(dev_priv); intel_runtime_pm_enable_interrupts(dev_priv); @@@ -2618,6 -2578,7 +2578,7 @@@ static struct drm_driver driver = .postclose = i915_driver_postclose, .set_busid = drm_pci_set_busid, + .gem_close_object = i915_gem_close_object, .gem_free_object = i915_gem_free_object, .gem_vm_ops = &i915_gem_vm_ops, diff --combined drivers/gpu/drm/i915/i915_gem.c index 11681501d7b1,7a00678ae729..f4f8eaa90f2a --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@@ -29,10 -29,13 +29,13 @@@ #include #include #include "i915_drv.h" + #include "i915_gem_dmabuf.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" + #include "intel_frontbuffer.h" #include "intel_mocs.h" + #include #include #include #include @@@ -41,10 -44,6 +44,6 @@@ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); - static void - i915_gem_object_retire__write(struct drm_i915_gem_object *obj); - static void - i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) @@@ -139,7 -138,6 +138,6 @@@ int i915_mutex_lock_interruptible(struc if (ret) return ret; - WARN_ON(i915_verify_lists(dev)); return 0; } @@@ -156,10 -154,10 +154,10 @@@ i915_gem_get_aperture_ioctl(struct drm_ pinned = 0; mutex_lock(&dev->struct_mutex); list_for_each_entry(vma, &ggtt->base.active_list, vm_link) - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) pinned += vma->node.size; list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) pinned += vma->node.size; mutex_unlock(&dev->struct_mutex); @@@ -172,7 -170,7 +170,7 @@@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { - struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; + struct address_space *mapping = obj->base.filp->f_mapping; char *vaddr = obj->phys_handle->vaddr; struct sg_table *st; struct scatterlist *sg; @@@ -239,7 -237,7 +237,7 @@@ i915_gem_object_put_pages_phys(struct d obj->dirty = 0; if (obj->dirty) { - struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; + struct address_space *mapping = obj->base.filp->f_mapping; char *vaddr = obj->phys_handle->vaddr; int i; @@@ -281,23 -279,119 +279,119 @@@ static const struct drm_i915_gem_object .release = i915_gem_object_release_phys, }; - static int - drop_pages(struct drm_i915_gem_object *obj) + int + i915_gem_object_unbind(struct drm_i915_gem_object *obj) { - struct i915_vma *vma, *next; + struct i915_vma *vma; + LIST_HEAD(still_in_list); int ret; - drm_gem_object_reference(&obj->base); - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) - if (i915_vma_unbind(vma)) + /* The vma will only be freed if it is marked as closed, and if we wait + * upon rendering to the vma, we may unbind anything in the list. + */ + while ((vma = list_first_entry_or_null(&obj->vma_list, + struct i915_vma, + obj_link))) { + list_move_tail(&vma->obj_link, &still_in_list); + ret = i915_vma_unbind(vma); + if (ret) break; - - ret = i915_gem_object_put_pages(obj); - drm_gem_object_unreference(&obj->base); + } + list_splice(&still_in_list, &obj->vma_list); return ret; } + /** + * Ensures that all rendering to the object has completed and the object is + * safe to unbind from the GTT or access from the CPU. + * @obj: i915 gem object + * @readonly: waiting for just read access or read-write access + */ + int + i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, + bool readonly) + { + struct reservation_object *resv; + struct i915_gem_active *active; + unsigned long active_mask; + int idx; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + + if (!readonly) { + active = obj->last_read; + active_mask = i915_gem_object_get_active(obj); + } else { + active_mask = 1; + active = &obj->last_write; + } + + for_each_active(active_mask, idx) { + int ret; + + ret = i915_gem_active_wait(&active[idx], + &obj->base.dev->struct_mutex); + if (ret) + return ret; + } + + resv = i915_gem_object_get_dmabuf_resv(obj); + if (resv) { + long err; + + err = reservation_object_wait_timeout_rcu(resv, !readonly, true, + MAX_SCHEDULE_TIMEOUT); + if (err < 0) + return err; + } + + return 0; + } + + /* A nonblocking variant of the above wait. Must be called prior to + * acquiring the mutex for the object, as the object state may change + * during this call. A reference must be held by the caller for the object. + */ + static __must_check int + __unsafe_wait_rendering(struct drm_i915_gem_object *obj, + struct intel_rps_client *rps, + bool readonly) + { + struct i915_gem_active *active; + unsigned long active_mask; + int idx; + + active_mask = __I915_BO_ACTIVE(obj); + if (!active_mask) + return 0; + + if (!readonly) { + active = obj->last_read; + } else { + active_mask = 1; + active = &obj->last_write; + } + + for_each_active(active_mask, idx) { + int ret; + + ret = i915_gem_active_wait_unlocked(&active[idx], + true, NULL, rps); + if (ret) + return ret; + } + + return 0; + } + + static struct intel_rps_client *to_rps_client(struct drm_file *file) + { + struct drm_i915_file_private *fpriv = file->driver_priv; + + return &fpriv->rps; + } + int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) @@@ -318,7 -412,11 +412,11 @@@ if (obj->base.filp == NULL) return -EINVAL; - ret = drop_pages(obj); + ret = i915_gem_object_unbind(obj); + if (ret) + return ret; + + ret = i915_gem_object_put_pages(obj); if (ret) return ret; @@@ -408,7 -506,7 +506,7 @@@ i915_gem_create(struct drm_file *file ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); if (ret) return ret; @@@ -511,6 -609,10 +609,10 @@@ int i915_gem_obj_prepare_shmem_read(str if (WARN_ON(!i915_gem_object_has_struct_page(obj))) return -EINVAL; + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; + if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This @@@ -518,9 -620,6 +620,6 @@@ * anyway again before the next pread happens. */ *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, obj->cache_level); - ret = i915_gem_object_wait_rendering(obj, true); - if (ret) - return ret; } ret = i915_gem_object_get_pages(obj); @@@ -644,7 -743,7 +743,7 @@@ i915_gem_gtt_pread(struct drm_device *d uint64_t offset; int ret; - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (ret) { ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); if (ret) @@@ -857,36 -956,44 +956,44 @@@ i915_gem_pread_ioctl(struct drm_device args->size)) return -EFAULT; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Bounds check source. */ if (args->offset > obj->base.size || args->size > obj->base.size - args->offset) { ret = -EINVAL; - goto out; + goto err; } trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), true); + if (ret) + goto err; + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err; + ret = i915_gem_shmem_pread(dev, obj, args, file); /* pread for non shmem backed objects */ - if (ret == -EFAULT || ret == -ENODEV) + if (ret == -EFAULT || ret == -ENODEV) { + intel_runtime_pm_get(to_i915(dev)); ret = i915_gem_gtt_pread(dev, obj, args->size, args->offset, args->data_ptr); + intel_runtime_pm_put(to_i915(dev)); + } - out: - drm_gem_object_unreference(&obj->base); - unlock: + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); + + return ret; + + err: + i915_gem_object_put_unlocked(obj); return ret; } @@@ -916,7 -1023,7 +1023,7 @@@ fast_user_write(struct io_mapping *mapp /** * This is the fast pwrite path, where we copy the data directly from the * user into the GTT, uncached. - * @dev: drm device pointer + * @i915: i915 device private data * @obj: i915 gem object * @args: pwrite arguments structure * @file: drm file pointer @@@ -935,10 -1042,11 +1042,11 @@@ i915_gem_gtt_pwrite_fast(struct drm_i91 int ret; bool hit_slow_path = false; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) return -EFAULT; - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | PIN_NONBLOCK); if (ret) { ret = insert_mappable_node(i915, &node, PAGE_SIZE); if (ret) @@@ -1132,15 -1240,16 +1240,16 @@@ i915_gem_shmem_pwrite(struct drm_devic obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + ret = i915_gem_object_wait_rendering(obj, false); + if (ret) + return ret; + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { /* If we're not in the cpu write domain, set ourself into the gtt * write domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ needs_clflush_after = cpu_write_needs_clflush(obj); - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; } /* Same trick applies to invalidate partially written cachelines read * before writing. */ @@@ -1270,27 -1379,29 +1379,29 @@@ i915_gem_pwrite_ioctl(struct drm_devic return -EFAULT; } - intel_runtime_pm_get(dev_priv); - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto put_rpm; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Bounds check destination. */ if (args->offset > obj->base.size || args->size > obj->base.size - args->offset) { ret = -EINVAL; - goto out; + goto err; } trace_i915_gem_object_pwrite(obj, args->offset, args->size); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), false); + if (ret) + goto err; + + intel_runtime_pm_get(dev_priv); + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err_rpm; + ret = -EFAULT; /* We can only do the GTT pwrite on untiled buffers, as otherwise * it would end up going through the fenced access, and we'll get @@@ -1306,7 -1417,7 +1417,7 @@@ * textures). Fallback to the shmem path in that case. */ } - if (ret == -EFAULT) { + if (ret == -EFAULT || ret == -ENOSPC) { if (obj->phys_handle) ret = i915_gem_phys_pwrite(obj, args, file); else if (i915_gem_object_has_struct_page(obj)) @@@ -1315,647 -1426,164 +1426,164 @@@ ret = -ENODEV; } - out: - drm_gem_object_unreference(&obj->base); - unlock: + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); - put_rpm: intel_runtime_pm_put(dev_priv); return ret; - } - - static int - i915_gem_check_wedge(unsigned reset_counter, bool interruptible) - { - if (__i915_terminally_wedged(reset_counter)) - return -EIO; - - if (__i915_reset_in_progress(reset_counter)) { - /* Non-interruptible callers can't handle -EAGAIN, hence return - * -EIO unconditionally for these. */ - if (!interruptible) - return -EIO; - return -EAGAIN; - } - - return 0; + err_rpm: + intel_runtime_pm_put(dev_priv); + err: + i915_gem_object_put_unlocked(obj); + return ret; } - static unsigned long local_clock_us(unsigned *cpu) + static enum fb_op_origin + write_origin(struct drm_i915_gem_object *obj, unsigned domain) { - unsigned long t; - - /* Cheaply and approximately convert from nanoseconds to microseconds. - * The result and subsequent calculations are also defined in the same - * approximate microseconds units. The principal source of timing - * error here is from the simple truncation. - * - * Note that local_clock() is only defined wrt to the current CPU; - * the comparisons are no longer valid if we switch CPUs. Instead of - * blocking preemption for the entire busywait, we can detect the CPU - * switch and use that as indicator of system load and a reason to - * stop busywaiting, see busywait_stop(). - */ - *cpu = get_cpu(); - t = local_clock() >> 10; - put_cpu(); - - return t; + return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ? + ORIGIN_GTT : ORIGIN_CPU; } - static bool busywait_stop(unsigned long timeout, unsigned cpu) + /** + * Called when user space prepares to use an object with the CPU, either + * through the mmap ioctl's mapping or a GTT mapping. + * @dev: drm device + * @data: ioctl data blob + * @file: drm file + */ + int + i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { - unsigned this_cpu; + struct drm_i915_gem_set_domain *args = data; + struct drm_i915_gem_object *obj; + uint32_t read_domains = args->read_domains; + uint32_t write_domain = args->write_domain; + int ret; - if (time_after(local_clock_us(&this_cpu), timeout)) - return true; + /* Only handle setting domains to types used by the CPU. */ + if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) + return -EINVAL; - return this_cpu != cpu; - } + /* Having something in the write domain implies it's in the read + * domain, and only that read domain. Enforce that in the request. + */ + if (write_domain != 0 && read_domains != write_domain) + return -EINVAL; - bool __i915_spin_request(const struct drm_i915_gem_request *req, - int state, unsigned long timeout_us) - { - unsigned cpu; + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; - /* When waiting for high frequency requests, e.g. during synchronous - * rendering split between the CPU and GPU, the finite amount of time - * required to set up the irq and wait upon it limits the response - * rate. By busywaiting on the request completion for a short while we - * can service the high frequency waits as quick as possible. However, - * if it is a slow request, we want to sleep as quickly as possible. - * The tradeoff between waiting and sleeping is roughly the time it - * takes to sleep on a request, on the order of a microsecond. + /* Try to flush the object off the GPU without holding the lock. + * We will repeat the flush holding the lock in the normal manner + * to catch cases where we are gazumped. */ + ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain); + if (ret) + goto err; - timeout_us += local_clock_us(&cpu); - do { - if (i915_gem_request_completed(req)) - return true; + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err; - if (signal_pending_state(state, current)) - break; + if (read_domains & I915_GEM_DOMAIN_GTT) + ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); + else + ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); - if (busywait_stop(timeout_us, cpu)) - break; + if (write_domain != 0) + intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); - cpu_relax_lowlatency(); - } while (!need_resched()); + i915_gem_object_put(obj); + mutex_unlock(&dev->struct_mutex); + return ret; - return false; + err: + i915_gem_object_put_unlocked(obj); + return ret; } /** - * __i915_wait_request - wait until execution of request has finished - * @req: duh! - * @interruptible: do an interruptible wait (normally yes) - * @timeout: in - how long to wait (NULL forever); out - how much time remaining - * @rps: RPS client - * - * Note: It is of utmost importance that the passed in seqno and reset_counter - * values have been read by the caller in an smp safe manner. Where read-side - * locks are involved, it is sufficient to read the reset_counter before - * unlocking the lock that protects the seqno. For lockless tricks, the - * reset_counter _must_ be read before, and an appropriate smp_rmb must be - * inserted. - * - * Returns 0 if the request was found within the alloted time. Else returns the - * errno with remaining time filled in timeout argument. + * Called when user space has done writes to this buffer + * @dev: drm device + * @data: ioctl data blob + * @file: drm file */ - int __i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, - s64 *timeout, - struct intel_rps_client *rps) - { - int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - DEFINE_WAIT(reset); - struct intel_wait wait; - unsigned long timeout_remain; - s64 before = 0; /* Only to silence a compiler warning. */ - int ret = 0; + int + i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) + { + struct drm_i915_gem_sw_finish *args = data; + struct drm_i915_gem_object *obj; + int err = 0; - might_sleep(); + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; - if (list_empty(&req->list)) - return 0; + /* Pinned buffers may be scanout, so flush the cache */ + if (READ_ONCE(obj->pin_display)) { + err = i915_mutex_lock_interruptible(dev); + if (!err) { + i915_gem_object_flush_cpu_write_domain(obj); + mutex_unlock(&dev->struct_mutex); + } + } - if (i915_gem_request_completed(req)) - return 0; + i915_gem_object_put_unlocked(obj); + return err; + } - timeout_remain = MAX_SCHEDULE_TIMEOUT; - if (timeout) { - if (WARN_ON(*timeout < 0)) - return -EINVAL; + /** + * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address + * it is mapped to. + * @dev: drm device + * @data: ioctl data blob + * @file: drm file + * + * While the mapping holds a reference on the contents of the object, it doesn't + * imply a ref on the object itself. + * + * IMPORTANT: + * + * DRM driver writers who look a this function as an example for how to do GEM + * mmap support, please don't implement mmap support like here. The modern way + * to implement DRM mmap support is with an mmap offset ioctl (like + * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. + * That way debug tooling like valgrind will understand what's going on, hiding + * the mmap call in a driver private ioctl will break that. The i915 driver only + * does cpu mmaps this way because we didn't know better. + */ + int + i915_gem_mmap_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) + { + struct drm_i915_gem_mmap *args = data; + struct drm_i915_gem_object *obj; + unsigned long addr; - if (*timeout == 0) - return -ETIME; + if (args->flags & ~(I915_MMAP_WC)) + return -EINVAL; - timeout_remain = nsecs_to_jiffies_timeout(*timeout); + if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) + return -ENODEV; - /* - * Record current time in case interrupted by signal, or wedged. - */ - before = ktime_get_raw_ns(); - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; - trace_i915_gem_request_wait_begin(req); - - /* This client is about to stall waiting for the GPU. In many cases - * this is undesirable and limits the throughput of the system, as - * many clients cannot continue processing user input/output whilst - * blocked. RPS autotuning may take tens of milliseconds to respond - * to the GPU load and thus incurs additional latency for the client. - * We can circumvent that by promoting the GPU frequency to maximum - * before we wait. This makes the GPU throttle up much more quickly - * (good for benchmarks and user experience, e.g. window animations), - * but at a cost of spending more power processing the workload - * (bad for battery). Not all clients even want their results - * immediately and for them we should just let the GPU select its own - * frequency to maximise efficiency. To prevent a single client from - * forcing the clocks too high for the whole system, we only allow - * each client to waitboost once in a busy period. + /* prime objects have no backing filp to GEM mmap + * pages from. */ - if (INTEL_INFO(req->i915)->gen >= 6) - gen6_rps_boost(req->i915, rps, req->emitted_jiffies); - - /* Optimistic spin for the next ~jiffie before touching IRQs */ - if (i915_spin_request(req, state, 5)) - goto complete; + if (!obj->base.filp) { + i915_gem_object_put_unlocked(obj); + return -EINVAL; + } - set_current_state(state); - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_wait_init(&wait, req->seqno); - if (intel_engine_add_wait(req->engine, &wait)) - /* In order to check that we haven't missed the interrupt - * as we enabled it, we need to kick ourselves to do a - * coherent check on the seqno before we sleep. - */ - goto wakeup; - - for (;;) { - if (signal_pending_state(state, current)) { - ret = -ERESTARTSYS; - break; - } - - timeout_remain = io_schedule_timeout(timeout_remain); - if (timeout_remain == 0) { - ret = -ETIME; - break; - } - - if (intel_wait_complete(&wait)) - break; - - set_current_state(state); - - wakeup: - /* Carefully check if the request is complete, giving time - * for the seqno to be visible following the interrupt. - * We also have to check in case we are kicked by the GPU - * reset in order to drop the struct_mutex. - */ - if (__i915_request_irq_complete(req)) - break; - - /* Only spin if we know the GPU is processing this request */ - if (i915_spin_request(req, state, 2)) - break; - } - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_engine_remove_wait(req->engine, &wait); - __set_current_state(TASK_RUNNING); - complete: - trace_i915_gem_request_wait_end(req); - - if (timeout) { - s64 tres = *timeout - (ktime_get_raw_ns() - before); - - *timeout = tres < 0 ? 0 : tres; - - /* - * Apparently ktime isn't accurate enough and occasionally has a - * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch - * things up to make the test happy. We allow up to 1 jiffy. - * - * This is a regrssion from the timespec->ktime conversion. - */ - if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) - *timeout = 0; - } - - if (rps && req->seqno == req->engine->last_submitted_seqno) { - /* The GPU is now idle and this client has stalled. - * Since no other client has submitted a request in the - * meantime, assume that this client is the only one - * supplying work to the GPU but is unable to keep that - * work supplied because it is waiting. Since the GPU is - * then never kept fully busy, RPS autoclocking will - * keep the clocks relatively low, causing further delays. - * Compensate by giving the synchronous client credit for - * a waitboost next time. - */ - spin_lock(&req->i915->rps.client_lock); - list_del_init(&rps->link); - spin_unlock(&req->i915->rps.client_lock); - } - - return ret; - } - - int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file) - { - struct drm_i915_file_private *file_priv; - - WARN_ON(!req || !file || req->file_priv); - - if (!req || !file) - return -EINVAL; - - if (req->file_priv) - return -EINVAL; - - file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - req->file_priv = file_priv; - list_add_tail(&req->client_list, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - - req->pid = get_pid(task_pid(current)); - - return 0; - } - - static inline void - i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) - { - struct drm_i915_file_private *file_priv = request->file_priv; - - if (!file_priv) - return; - - spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; - spin_unlock(&file_priv->mm.lock); - - put_pid(request->pid); - request->pid = NULL; - } - - static void i915_gem_request_retire(struct drm_i915_gem_request *request) - { - trace_i915_gem_request_retire(request); - - /* We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - request->ringbuf->last_retired_head = request->postfix; - - list_del_init(&request->list); - i915_gem_request_remove_from_client(request); - - if (request->previous_context) { - if (i915.enable_execlists) - intel_lr_context_unpin(request->previous_context, - request->engine); - } - - i915_gem_context_unreference(request->ctx); - i915_gem_request_unreference(request); - } - - static void - __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) - { - struct intel_engine_cs *engine = req->engine; - struct drm_i915_gem_request *tmp; - - lockdep_assert_held(&engine->i915->drm.struct_mutex); - - if (list_empty(&req->list)) - return; - - do { - tmp = list_first_entry(&engine->request_list, - typeof(*tmp), list); - - i915_gem_request_retire(tmp); - } while (tmp != req); - - WARN_ON(i915_verify_lists(engine->dev)); - } - - /** - * Waits for a request to be signaled, and cleans up the - * request and object lists appropriately for that event. - * @req: request to wait on - */ - int - i915_wait_request(struct drm_i915_gem_request *req) - { - struct drm_i915_private *dev_priv = req->i915; - bool interruptible; - int ret; - - interruptible = dev_priv->mm.interruptible; - - BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex)); - - ret = __i915_wait_request(req, interruptible, NULL, NULL); - if (ret) - return ret; - - /* If the GPU hung, we want to keep the requests to find the guilty. */ - if (!i915_reset_in_progress(&dev_priv->gpu_error)) - __i915_gem_request_retire__upto(req); - - return 0; - } - - /** - * Ensures that all rendering to the object has completed and the object is - * safe to unbind from the GTT or access from the CPU. - * @obj: i915 gem object - * @readonly: waiting for read access or write - */ - int - i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, - bool readonly) - { - int ret, i; - - if (!obj->active) - return 0; - - if (readonly) { - if (obj->last_write_req != NULL) { - ret = i915_wait_request(obj->last_write_req); - if (ret) - return ret; - - i = obj->last_write_req->engine->id; - if (obj->last_read_req[i] == obj->last_write_req) - i915_gem_object_retire__read(obj, i); - else - i915_gem_object_retire__write(obj); - } - } else { - for (i = 0; i < I915_NUM_ENGINES; i++) { - if (obj->last_read_req[i] == NULL) - continue; - - ret = i915_wait_request(obj->last_read_req[i]); - if (ret) - return ret; - - i915_gem_object_retire__read(obj, i); - } - GEM_BUG_ON(obj->active); - } - - return 0; - } - - static void - i915_gem_object_retire_request(struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req) - { - int ring = req->engine->id; - - if (obj->last_read_req[ring] == req) - i915_gem_object_retire__read(obj, ring); - else if (obj->last_write_req == req) - i915_gem_object_retire__write(obj); - - if (!i915_reset_in_progress(&req->i915->gpu_error)) - __i915_gem_request_retire__upto(req); - } - - /* A nonblocking variant of the above wait. This is a highly dangerous routine - * as the object state may change during this call. - */ - static __must_check int - i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, - struct intel_rps_client *rps, - bool readonly) - { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; - int ret, i, n = 0; - - BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - BUG_ON(!dev_priv->mm.interruptible); - - if (!obj->active) - return 0; - - if (readonly) { - struct drm_i915_gem_request *req; - - req = obj->last_write_req; - if (req == NULL) - return 0; - - requests[n++] = i915_gem_request_reference(req); - } else { - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = obj->last_read_req[i]; - if (req == NULL) - continue; - - requests[n++] = i915_gem_request_reference(req); - } - } - - mutex_unlock(&dev->struct_mutex); - ret = 0; - for (i = 0; ret == 0 && i < n; i++) - ret = __i915_wait_request(requests[i], true, NULL, rps); - mutex_lock(&dev->struct_mutex); - - for (i = 0; i < n; i++) { - if (ret == 0) - i915_gem_object_retire_request(obj, requests[i]); - i915_gem_request_unreference(requests[i]); - } - - return ret; - } - - static struct intel_rps_client *to_rps_client(struct drm_file *file) - { - struct drm_i915_file_private *fpriv = file->driver_priv; - return &fpriv->rps; - } - - static enum fb_op_origin - write_origin(struct drm_i915_gem_object *obj, unsigned domain) - { - return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ? - ORIGIN_GTT : ORIGIN_CPU; - } - - /** - * Called when user space prepares to use an object with the CPU, either - * through the mmap ioctl's mapping or a GTT mapping. - * @dev: drm device - * @data: ioctl data blob - * @file: drm file - */ - int - i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) - { - struct drm_i915_gem_set_domain *args = data; - struct drm_i915_gem_object *obj; - uint32_t read_domains = args->read_domains; - uint32_t write_domain = args->write_domain; - int ret; - - /* Only handle setting domains to types used by the CPU. */ - if (write_domain & I915_GEM_GPU_DOMAINS) - return -EINVAL; - - if (read_domains & I915_GEM_GPU_DOMAINS) - return -EINVAL; - - /* Having something in the write domain implies it's in the read - * domain, and only that read domain. Enforce that in the request. - */ - if (write_domain != 0 && read_domains != write_domain) - return -EINVAL; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - /* Try to flush the object off the GPU without holding the lock. - * We will repeat the flush holding the lock in the normal manner - * to catch cases where we are gazumped. - */ - ret = i915_gem_object_wait_rendering__nonblocking(obj, - to_rps_client(file), - !write_domain); - if (ret) - goto unref; - - if (read_domains & I915_GEM_DOMAIN_GTT) - ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); - else - ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); - - if (write_domain != 0) - intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); - - unref: - drm_gem_object_unreference(&obj->base); - unlock: - mutex_unlock(&dev->struct_mutex); - return ret; - } - - /** - * Called when user space has done writes to this buffer - * @dev: drm device - * @data: ioctl data blob - * @file: drm file - */ - int - i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) - { - struct drm_i915_gem_sw_finish *args = data; - struct drm_i915_gem_object *obj; - int ret = 0; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - /* Pinned buffers may be scanout, so flush the cache */ - if (obj->pin_display) - i915_gem_object_flush_cpu_write_domain(obj); - - drm_gem_object_unreference(&obj->base); - unlock: - mutex_unlock(&dev->struct_mutex); - return ret; - } - - /** - * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address - * it is mapped to. - * @dev: drm device - * @data: ioctl data blob - * @file: drm file - * - * While the mapping holds a reference on the contents of the object, it doesn't - * imply a ref on the object itself. - * - * IMPORTANT: - * - * DRM driver writers who look a this function as an example for how to do GEM - * mmap support, please don't implement mmap support like here. The modern way - * to implement DRM mmap support is with an mmap offset ioctl (like - * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. - * That way debug tooling like valgrind will understand what's going on, hiding - * the mmap call in a driver private ioctl will break that. The i915 driver only - * does cpu mmaps this way because we didn't know better. - */ - int - i915_gem_mmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) - { - struct drm_i915_gem_mmap *args = data; - struct drm_gem_object *obj; - unsigned long addr; - - if (args->flags & ~(I915_MMAP_WC)) - return -EINVAL; - - if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) - return -ENODEV; - - obj = drm_gem_object_lookup(file, args->handle); - if (obj == NULL) - return -ENOENT; - - /* prime objects have no backing filp to GEM mmap - * pages from. - */ - if (!obj->filp) { - drm_gem_object_unreference_unlocked(obj); - return -EINVAL; - } - - addr = vm_mmap(obj->filp, 0, args->size, + addr = vm_mmap(obj->base.filp, 0, args->size, PROT_READ | PROT_WRITE, MAP_SHARED, args->offset); if (args->flags & I915_MMAP_WC) { @@@ -1963,7 -1591,7 +1591,7 @@@ struct vm_area_struct *vma; if (down_write_killable(&mm->mmap_sem)) { - drm_gem_object_unreference_unlocked(obj); + i915_gem_object_put_unlocked(obj); return -EINTR; } vma = find_vma(mm, addr); @@@ -1975,9 -1603,9 +1603,9 @@@ up_write(&mm->mmap_sem); /* This may race, but that's ok, it only gets set */ - WRITE_ONCE(to_intel_bo(obj)->has_wc_mmap, true); + WRITE_ONCE(obj->has_wc_mmap, true); } - drm_gem_object_unreference_unlocked(obj); + i915_gem_object_put_unlocked(obj); if (IS_ERR((void *)addr)) return addr; @@@ -2009,41 -1637,41 +1637,41 @@@ int i915_gem_fault(struct vm_area_struc struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_ggtt_view view = i915_ggtt_view_normal; + bool write = !!(vmf->flags & FAULT_FLAG_WRITE); pgoff_t page_offset; unsigned long pfn; - int ret = 0; - bool write = !!(vmf->flags & FAULT_FLAG_WRITE); - - intel_runtime_pm_get(dev_priv); + int ret; /* We don't use vmf->pgoff since that has the fake offset */ page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto out; - trace_i915_gem_object_fault(obj, page_offset, true, write); /* Try to flush the object off the GPU first without holding the lock. - * Upon reacquiring the lock, we will perform our sanity checks and then + * Upon acquiring the lock, we will perform our sanity checks and then * repeat the flush holding the lock in the normal manner to catch cases * where we are gazumped. */ - ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); + ret = __unsafe_wait_rendering(obj, NULL, !write); if (ret) - goto unlock; + goto err; + + intel_runtime_pm_get(dev_priv); + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err_rpm; /* Access to snoopable pages through the GTT is incoherent. */ if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { ret = -EFAULT; - goto unlock; + goto err_unlock; } /* Use a partial view if the object is bigger than the aperture. */ if (obj->base.size >= ggtt->mappable_end && - obj->tiling_mode == I915_TILING_NONE) { + !i915_gem_object_is_tiled(obj)) { static const unsigned int chunk_size = 256; // 1 MiB memset(&view, 0, sizeof(view)); @@@ -2057,17 -1685,17 +1685,17 @@@ } /* Now pin it into the GTT if needed */ - ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); if (ret) - goto unlock; + goto err_unlock; ret = i915_gem_object_set_to_gtt_domain(obj, write); if (ret) - goto unpin; + goto err_unpin; ret = i915_gem_object_get_fence(obj); if (ret) - goto unpin; + goto err_unpin; /* Finally, remap it using the new GTT offset */ pfn = ggtt->mappable_base + @@@ -2112,11 -1740,13 +1740,13 @@@ (unsigned long)vmf->virtual_address, pfn + page_offset); } - unpin: + err_unpin: i915_gem_object_ggtt_unpin_view(obj, &view); - unlock: + err_unlock: mutex_unlock(&dev->struct_mutex); - out: + err_rpm: + intel_runtime_pm_put(dev_priv); + err: switch (ret) { case -EIO: /* @@@ -2157,8 -1787,6 +1787,6 @@@ ret = VM_FAULT_SIGBUS; break; } - - intel_runtime_pm_put(dev_priv); return ret; } @@@ -2212,46 -1840,58 +1840,58 @@@ i915_gem_release_all_mmaps(struct drm_i i915_gem_release_mmap(obj); } - uint32_t - i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) + /** + * i915_gem_get_ggtt_size - return required global GTT size for an object + * @dev_priv: i915 device + * @size: object size + * @tiling_mode: tiling mode + * + * Return the required global GTT size for an object, taking into account + * potential fence register mapping. + */ + u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, + u64 size, int tiling_mode) { - uint32_t gtt_size; + u64 ggtt_size; + + GEM_BUG_ON(size == 0); - if (INTEL_INFO(dev)->gen >= 4 || + if (INTEL_GEN(dev_priv) >= 4 || tiling_mode == I915_TILING_NONE) return size; /* Previous chips need a power-of-two fence region when tiling */ - if (IS_GEN3(dev)) - gtt_size = 1024*1024; + if (IS_GEN3(dev_priv)) + ggtt_size = 1024*1024; else - gtt_size = 512*1024; + ggtt_size = 512*1024; - while (gtt_size < size) - gtt_size <<= 1; + while (ggtt_size < size) + ggtt_size <<= 1; - return gtt_size; + return ggtt_size; } /** - * i915_gem_get_gtt_alignment - return required GTT alignment for an object - * @dev: drm device + * i915_gem_get_ggtt_alignment - return required global GTT alignment + * @dev_priv: i915 device * @size: object size * @tiling_mode: tiling mode - * @fenced: is fenced alignemned required or not + * @fenced: is fenced alignment required or not * - * Return the required GTT alignment for an object, taking into account + * Return the required global GTT alignment for an object, taking into account * potential fence register mapping. */ - uint32_t - i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, - int tiling_mode, bool fenced) + u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, + int tiling_mode, bool fenced) { + GEM_BUG_ON(size == 0); + /* * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ - if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || + if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) || tiling_mode == I915_TILING_NONE) return 4096; @@@ -2259,42 -1899,34 +1899,34 @@@ * Previous chips need to be aligned to the size of the smallest * fence register that can contain the object. */ - return i915_gem_get_gtt_size(dev, size, tiling_mode); + return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode); } static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - int ret; - - dev_priv->mm.shrinker_no_lock_stealing = true; + int err; - ret = drm_gem_create_mmap_offset(&obj->base); - if (ret != -ENOSPC) - goto out; + err = drm_gem_create_mmap_offset(&obj->base); + if (!err) + return 0; - /* Badly fragmented mmap space? The only way we can recover - * space is by destroying unwanted objects. We can't randomly release - * mmap_offsets as userspace expects them to be persistent for the - * lifetime of the objects. The closest we can is to release the - * offsets on purgeable objects by truncating it and marking it purged, - * which prevents userspace from ever using that object again. + /* We can idle the GPU locklessly to flush stale objects, but in order + * to claim that space for ourselves, we need to take the big + * struct_mutex to free the requests+objects and allocate our slot. */ - i915_gem_shrink(dev_priv, - obj->base.size >> PAGE_SHIFT, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE); - ret = drm_gem_create_mmap_offset(&obj->base); - if (ret != -ENOSPC) - goto out; + err = i915_gem_wait_for_idle(dev_priv, true); + if (err) + return err; - i915_gem_shrink_all(dev_priv); - ret = drm_gem_create_mmap_offset(&obj->base); - out: - dev_priv->mm.shrinker_no_lock_stealing = false; + err = i915_mutex_lock_interruptible(&dev_priv->drm); + if (!err) { + i915_gem_retire_requests(dev_priv); + err = drm_gem_create_mmap_offset(&obj->base); + mutex_unlock(&dev_priv->drm.struct_mutex); + } - return ret; + return err; } static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) @@@ -2311,32 -1943,15 +1943,15 @@@ i915_gem_mmap_gtt(struct drm_file *file struct drm_i915_gem_object *obj; int ret; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - if (obj->madv != I915_MADV_WILLNEED) { - DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); - ret = -EFAULT; - goto out; - } + obj = i915_gem_object_lookup(file, handle); + if (!obj) + return -ENOENT; ret = i915_gem_object_create_mmap_offset(obj); - if (ret) - goto out; + if (ret == 0) + *offset = drm_vma_node_offset_addr(&obj->base.vma_node); - *offset = drm_vma_node_offset_addr(&obj->base.vma_node); - - out: - drm_gem_object_unreference(&obj->base); - unlock: - mutex_unlock(&dev->struct_mutex); + i915_gem_object_put_unlocked(obj); return ret; } @@@ -2398,7 -2013,7 +2013,7 @@@ i915_gem_object_invalidate(struct drm_i if (obj->base.filp == NULL) return; - mapping = file_inode(obj->base.filp)->i_mapping, + mapping = obj->base.filp->f_mapping, invalidate_mapping_pages(mapping, 0, (loff_t)-1); } @@@ -2454,7 -2069,7 +2069,7 @@@ i915_gem_object_put_pages(struct drm_i9 if (obj->pages_pin_count) return -EBUSY; - BUG_ON(i915_gem_obj_bound_any(obj)); + GEM_BUG_ON(obj->bind_count); /* ->put_pages might need to allocate memory for the bit17 swizzle * array, hence protect them from being reaped by removing them from gtt @@@ -2513,7 -2128,7 +2128,7 @@@ i915_gem_object_get_pages_gtt(struct dr * * Fail silently without starting the shrinker */ - mapping = file_inode(obj->base.filp)->i_mapping; + mapping = obj->base.filp->f_mapping; gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); gfp |= __GFP_NORETRY | __GFP_NOWARN; sg = st->sgl; @@@ -2574,7 -2189,7 +2189,7 @@@ if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj); - if (obj->tiling_mode != I915_TILING_NONE && + if (i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) i915_gem_object_pin_pages(obj); @@@ -2650,301 -2265,87 +2265,87 @@@ static void *i915_gem_object_map(const void *addr; /* A single page can always be kmapped */ - if (n_pages == 1) - return kmap(sg_page(sgt->sgl)); - - if (n_pages > ARRAY_SIZE(stack_pages)) { - /* Too big for stack -- allocate temporary array instead */ - pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY); - if (!pages) - return NULL; - } - - for_each_sgt_page(page, sgt_iter, sgt) - pages[i++] = page; - - /* Check that we have the expected number of pages */ - GEM_BUG_ON(i != n_pages); - - addr = vmap(pages, n_pages, 0, PAGE_KERNEL); - - if (pages != stack_pages) - drm_free_large(pages); - - return addr; - } - - /* get, pin, and map the pages of the object into kernel space */ - void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) - { - int ret; - - lockdep_assert_held(&obj->base.dev->struct_mutex); - - ret = i915_gem_object_get_pages(obj); - if (ret) - return ERR_PTR(ret); - - i915_gem_object_pin_pages(obj); - - if (!obj->mapping) { - obj->mapping = i915_gem_object_map(obj); - if (!obj->mapping) { - i915_gem_object_unpin_pages(obj); - return ERR_PTR(-ENOMEM); - } - } - - return obj->mapping; - } - - void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req) - { - struct drm_i915_gem_object *obj = vma->obj; - struct intel_engine_cs *engine; - - engine = i915_gem_request_get_engine(req); - - /* Add a reference if we're newly entering the active list. */ - if (obj->active == 0) - drm_gem_object_reference(&obj->base); - obj->active |= intel_engine_flag(engine); - - list_move_tail(&obj->engine_list[engine->id], &engine->active_list); - i915_gem_request_assign(&obj->last_read_req[engine->id], req); - - list_move_tail(&vma->vm_link, &vma->vm->active_list); - } - - static void - i915_gem_object_retire__write(struct drm_i915_gem_object *obj) - { - GEM_BUG_ON(obj->last_write_req == NULL); - GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); - - i915_gem_request_assign(&obj->last_write_req, NULL); - intel_fb_obj_flush(obj, true, ORIGIN_CS); - } - - static void - i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) - { - struct i915_vma *vma; - - GEM_BUG_ON(obj->last_read_req[ring] == NULL); - GEM_BUG_ON(!(obj->active & (1 << ring))); - - list_del_init(&obj->engine_list[ring]); - i915_gem_request_assign(&obj->last_read_req[ring], NULL); - - if (obj->last_write_req && obj->last_write_req->engine->id == ring) - i915_gem_object_retire__write(obj); - - obj->active &= ~(1 << ring); - if (obj->active) - return; - - /* Bump our place on the bound list to keep it roughly in LRU order - * so that we don't steal from recently used but inactive objects - * (unless we are forced to ofc!) - */ - list_move_tail(&obj->global_list, - &to_i915(obj->base.dev)->mm.bound_list); - - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!list_empty(&vma->vm_link)) - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - } - - i915_gem_request_assign(&obj->last_fenced_req, NULL); - drm_gem_object_unreference(&obj->base); - } - - static int - i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) - { - struct intel_engine_cs *engine; - int ret; - - /* Carefully retire all requests without writing to the rings */ - for_each_engine(engine, dev_priv) { - ret = intel_engine_idle(engine); - if (ret) - return ret; - } - i915_gem_retire_requests(dev_priv); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { - while (intel_kick_waiters(dev_priv) || - intel_kick_signalers(dev_priv)) - yield(); - } - - /* Finally reset hw state */ - for_each_engine(engine, dev_priv) - intel_ring_init_seqno(engine, seqno); - - return 0; - } - - int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) - { - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - ret = i915_gem_init_seqno(dev_priv, seqno - 1); - if (ret) - return ret; - - /* Carefully set the last_seqno value so that wrap - * detection still works - */ - dev_priv->next_seqno = seqno; - dev_priv->last_seqno = seqno - 1; - if (dev_priv->last_seqno == 0) - dev_priv->last_seqno--; - - return 0; - } - - int - i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) - { - /* reserve 0 for non-seqno */ - if (dev_priv->next_seqno == 0) { - int ret = i915_gem_init_seqno(dev_priv, 0); - if (ret) - return ret; + if (n_pages == 1) + return kmap(sg_page(sgt->sgl)); - dev_priv->next_seqno = 1; + if (n_pages > ARRAY_SIZE(stack_pages)) { + /* Too big for stack -- allocate temporary array instead */ + pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY); + if (!pages) + return NULL; } - *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; - return 0; - } - - static void i915_gem_mark_busy(const struct intel_engine_cs *engine) - { - struct drm_i915_private *dev_priv = engine->i915; + for_each_sgt_page(page, sgt_iter, sgt) + pages[i++] = page; - dev_priv->gt.active_engines |= intel_engine_flag(engine); - if (dev_priv->gt.awake) - return; + /* Check that we have the expected number of pages */ + GEM_BUG_ON(i != n_pages); - intel_runtime_pm_get_noresume(dev_priv); - dev_priv->gt.awake = true; + addr = vmap(pages, n_pages, 0, PAGE_KERNEL); - i915_update_gfx_val(dev_priv); - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_busy(dev_priv); + if (pages != stack_pages) + drm_free_large(pages); - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.retire_work, - round_jiffies_up_relative(HZ)); + return addr; } - /* - * NB: This function is not allowed to fail. Doing so would mean the the - * request is not being tracked for completion but the work itself is - * going to happen on the hardware. This would be a Bad Thing(tm). - */ - void __i915_add_request(struct drm_i915_gem_request *request, - struct drm_i915_gem_object *obj, - bool flush_caches) + /* get, pin, and map the pages of the object into kernel space */ + void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) { - struct intel_engine_cs *engine; - struct intel_ringbuffer *ringbuf; - u32 request_start; - u32 reserved_tail; int ret; - if (WARN_ON(request == NULL)) - return; + lockdep_assert_held(&obj->base.dev->struct_mutex); - engine = request->engine; - ringbuf = request->ringbuf; + ret = i915_gem_object_get_pages(obj); + if (ret) + return ERR_PTR(ret); - /* - * To ensure that this call will not fail, space for its emissions - * should already have been reserved in the ring buffer. Let the ring - * know that it is time to use that space up. - */ - request_start = intel_ring_get_tail(ringbuf); - reserved_tail = request->reserved_space; - request->reserved_space = 0; + i915_gem_object_pin_pages(obj); - /* - * Emit any outstanding flushes - execbuf can fail to emit the flush - * after having emitted the batchbuffer command. Hence we need to fix - * things up similar to emitting the lazy request. The difference here - * is that the flush _must_ happen before the next request, no matter - * what. - */ - if (flush_caches) { - if (i915.enable_execlists) - ret = logical_ring_flush_all_caches(request); - else - ret = intel_ring_flush_all_caches(request); - /* Not allowed to fail! */ - WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); + if (!obj->mapping) { + obj->mapping = i915_gem_object_map(obj); + if (!obj->mapping) { + i915_gem_object_unpin_pages(obj); + return ERR_PTR(-ENOMEM); + } } - trace_i915_gem_request_add(request); + return obj->mapping; + } - request->head = request_start; + static void + i915_gem_object_retire__write(struct i915_gem_active *active, + struct drm_i915_gem_request *request) + { + struct drm_i915_gem_object *obj = + container_of(active, struct drm_i915_gem_object, last_write); - /* Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when this - * request is retired will the the batch_obj be moved onto the - * inactive_list and lose its active reference. Hence we do not need - * to explicitly hold another reference here. - */ - request->batch_obj = obj; + intel_fb_obj_flush(obj, true, ORIGIN_CS); + } - /* Seal the request and mark it as pending execution. Note that - * we may inspect this state, without holding any locks, during - * hangcheck. Hence we apply the barrier to ensure that we do not - * see a more recent value in the hws than we are tracking. - */ - request->emitted_jiffies = jiffies; - request->previous_seqno = engine->last_submitted_seqno; - smp_store_mb(engine->last_submitted_seqno, request->seqno); - list_add_tail(&request->list, &engine->request_list); - - /* Record the position of the start of the request so that - * should we detect the updated seqno part-way through the - * GPU processing the request, we never over-estimate the - * position of the head. - */ - request->postfix = intel_ring_get_tail(ringbuf); + static void + i915_gem_object_retire__read(struct i915_gem_active *active, + struct drm_i915_gem_request *request) + { + int idx = request->engine->id; + struct drm_i915_gem_object *obj = + container_of(active, struct drm_i915_gem_object, last_read[idx]); - if (i915.enable_execlists) - ret = engine->emit_request(request); - else { - ret = engine->add_request(request); + GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx)); - request->tail = intel_ring_get_tail(ringbuf); - } - /* Not allowed to fail! */ - WARN(ret, "emit|add_request failed: %d!\n", ret); - /* Sanity check that the reserved size was large enough. */ - ret = intel_ring_get_tail(ringbuf) - request_start; - if (ret < 0) - ret += ringbuf->size; - WARN_ONCE(ret > reserved_tail, - "Not enough space reserved (%d bytes) " - "for adding the request (%d bytes)\n", - reserved_tail, ret); + i915_gem_object_clear_active(obj, idx); + if (i915_gem_object_is_active(obj)) + return; + + /* Bump our place on the bound list to keep it roughly in LRU order + * so that we don't steal from recently used but inactive objects + * (unless we are forced to ofc!) + */ + if (obj->bind_count) + list_move_tail(&obj->global_list, + &request->i915->mm.bound_list); - i915_gem_mark_busy(engine); + i915_gem_object_put(obj); } static bool i915_context_is_banned(const struct i915_gem_context *ctx) @@@ -2978,101 -2379,6 +2379,6 @@@ static void i915_set_reset_status(struc } } - void i915_gem_request_free(struct kref *req_ref) - { - struct drm_i915_gem_request *req = container_of(req_ref, - typeof(*req), ref); - kmem_cache_free(req->i915->requests, req); - } - - static inline int - __i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx, - struct drm_i915_gem_request **req_out) - { - struct drm_i915_private *dev_priv = engine->i915; - unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); - struct drm_i915_gem_request *req; - int ret; - - if (!req_out) - return -EINVAL; - - *req_out = NULL; - - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex - * and restart. - */ - ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); - if (ret) - return ret; - - req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); - if (req == NULL) - return -ENOMEM; - - ret = i915_gem_get_seqno(engine->i915, &req->seqno); - if (ret) - goto err; - - kref_init(&req->ref); - req->i915 = dev_priv; - req->engine = engine; - req->ctx = ctx; - i915_gem_context_reference(req->ctx); - - /* - * Reserve space in the ring buffer for all the commands required to - * eventually emit this request. This is to guarantee that the - * i915_add_request() call can't fail. Note that the reserve may need - * to be redone if the request is not actually submitted straight - * away, e.g. because a GPU scheduler has deferred it. - */ - req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; - - if (i915.enable_execlists) - ret = intel_logical_ring_alloc_request_extras(req); - else - ret = intel_ring_alloc_request_extras(req); - if (ret) - goto err_ctx; - - *req_out = req; - return 0; - - err_ctx: - i915_gem_context_unreference(ctx); - err: - kmem_cache_free(dev_priv->requests, req); - return ret; - } - - /** - * i915_gem_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * This can be NULL if the request is not directly related to - * any specific user context, in which case this function will - * choose an appropriate context to use. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ - struct drm_i915_gem_request * - i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) - { - struct drm_i915_gem_request *req; - int err; - - if (ctx == NULL) - ctx = engine->i915->kernel_context; - err = __i915_gem_request_alloc(engine, ctx, &req); - return err ? ERR_PTR(err) : req; - } - struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { @@@ -3086,7 -2392,7 +2392,7 @@@ * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - list_for_each_entry(request, &engine->request_list, list) { + list_for_each_entry(request, &engine->request_list, link) { if (i915_gem_request_completed(request)) continue; @@@ -3108,23 -2414,24 +2414,24 @@@ static void i915_gem_reset_engine_statu ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; i915_set_reset_status(request->ctx, ring_hung); - list_for_each_entry_continue(request, &engine->request_list, list) + list_for_each_entry_continue(request, &engine->request_list, link) i915_set_reset_status(request->ctx, false); } static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) { - struct intel_ringbuffer *buffer; - - while (!list_empty(&engine->active_list)) { - struct drm_i915_gem_object *obj; + struct drm_i915_gem_request *request; + struct intel_ring *ring; - obj = list_first_entry(&engine->active_list, - struct drm_i915_gem_object, - engine_list[engine->id]); + request = i915_gem_active_peek(&engine->last_request, + &engine->i915->drm.struct_mutex); - i915_gem_object_retire__read(obj, engine->id); - } + /* Mark all pending requests as complete so that any concurrent + * (lockless) lookup doesn't try and wait upon the request as we + * reset it. + */ + if (request) + intel_engine_init_seqno(engine, request->fence.seqno); /* * Clear the execlists queue up before freeing the requests, as those @@@ -3146,15 -2453,9 +2453,9 @@@ * implicit references on things like e.g. ppgtt address spaces through * the request. */ - while (!list_empty(&engine->request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&engine->request_list, - struct drm_i915_gem_request, - list); - - i915_gem_request_retire(request); - } + if (request) + i915_gem_request_retire_upto(request); + GEM_BUG_ON(intel_engine_is_active(engine)); /* Having flushed all requests from all queues, we know that all * ringbuffers must now be empty. However, since we do not reclaim @@@ -3163,12 -2464,12 +2464,12 @@@ * upon reset is less than when we start. Do one more pass over * all the ringbuffers to reset last_retired_head. */ - list_for_each_entry(buffer, &engine->buffers, link) { - buffer->last_retired_head = buffer->tail; - intel_ring_update_space(buffer); + list_for_each_entry(ring, &engine->buffers, link) { + ring->last_retired_head = ring->tail; + intel_ring_update_space(ring); } - intel_ring_init_seqno(engine, engine->last_submitted_seqno); + engine->i915->gt.active_engines &= ~intel_engine_flag(engine); } void i915_gem_reset(struct drm_device *dev) @@@ -3186,82 -2487,11 +2487,11 @@@ for_each_engine(engine, dev_priv) i915_gem_reset_engine_cleanup(engine); + mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); i915_gem_context_reset(dev); i915_gem_restore_fences(dev); - - WARN_ON(i915_verify_lists(dev)); - } - - /** - * This function clears the request list as sequence numbers are passed. - * @engine: engine to retire requests on - */ - void - i915_gem_retire_requests_ring(struct intel_engine_cs *engine) - { - WARN_ON(i915_verify_lists(engine->dev)); - - /* Retire requests first as we use it above for the early return. - * If we retire requests last, we may use a later seqno and so clear - * the requests lists without clearing the active list, leading to - * confusion. - */ - while (!list_empty(&engine->request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&engine->request_list, - struct drm_i915_gem_request, - list); - - if (!i915_gem_request_completed(request)) - break; - - i915_gem_request_retire(request); - } - - /* Move any buffers on the active list that are no longer referenced - * by the ringbuffer to the flushing/inactive lists as appropriate, - * before we free the context associated with the requests. - */ - while (!list_empty(&engine->active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&engine->active_list, - struct drm_i915_gem_object, - engine_list[engine->id]); - - if (!list_empty(&obj->last_read_req[engine->id]->list)) - break; - - i915_gem_object_retire__read(obj, engine->id); - } - - WARN_ON(i915_verify_lists(engine->dev)); - } - - void i915_gem_retire_requests(struct drm_i915_private *dev_priv) - { - struct intel_engine_cs *engine; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (dev_priv->gt.active_engines == 0) - return; - - GEM_BUG_ON(!dev_priv->gt.awake); - - for_each_engine(engine, dev_priv) { - i915_gem_retire_requests_ring(engine); - if (list_empty(&engine->request_list)) - dev_priv->gt.active_engines &= ~intel_engine_flag(engine); - } - - if (dev_priv->gt.active_engines == 0) - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.idle_work, - msecs_to_jiffies(100)); } static void @@@ -3281,10 -2511,12 +2511,12 @@@ i915_gem_retire_work_handler(struct wor * We do not need to do this test under locking as in the worst-case * we queue the retire worker once too often. */ - if (READ_ONCE(dev_priv->gt.awake)) + if (READ_ONCE(dev_priv->gt.awake)) { + i915_queue_hangcheck(dev_priv); queue_delayed_work(dev_priv->wq, &dev_priv->gt.retire_work, round_jiffies_up_relative(HZ)); + } } static void @@@ -3324,11 -2556,14 +2556,14 @@@ i915_gem_idle_work_handler(struct work_ dev_priv->gt.awake = false; rearm_hangcheck = false; + /* As we have disabled hangcheck, we need to unstick any waiters still + * hanging around. However, as we may be racing against the interrupt + * handler or the waiters themselves, we skip enabling the fake-irq. + */ stuck_engines = intel_kick_waiters(dev_priv); - if (unlikely(stuck_engines)) { - DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n"); - dev_priv->gpu_error.missed_irq_rings |= stuck_engines; - } + if (unlikely(stuck_engines)) + DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n", + stuck_engines); if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); @@@ -3343,32 -2578,17 +2578,17 @@@ out_rearm } } - /** - * Ensures that an object will eventually get non-busy by flushing any required - * write domains, emitting any outstanding lazy request and retiring and - * completed requests. - * @obj: object to flush - */ - static int - i915_gem_object_flush_active(struct drm_i915_gem_object *obj) + void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { - int i; - - if (!obj->active) - return 0; - - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = obj->last_read_req[i]; - if (req == NULL) - continue; - - if (i915_gem_request_completed(req)) - i915_gem_object_retire__read(obj, i); - } + struct drm_i915_gem_object *obj = to_intel_bo(gem); + struct drm_i915_file_private *fpriv = file->driver_priv; + struct i915_vma *vma, *vn; - return 0; + mutex_lock(&obj->base.dev->struct_mutex); + list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) + if (vma->vm->file == fpriv) + i915_vma_close(vma); + mutex_unlock(&obj->base.dev->struct_mutex); } /** @@@ -3399,122 -2619,58 +2619,58 @@@ in i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_wait *args = data; + struct intel_rps_client *rps = to_rps_client(file); struct drm_i915_gem_object *obj; - struct drm_i915_gem_request *req[I915_NUM_ENGINES]; - int i, n = 0; - int ret; + unsigned long active; + int idx, ret = 0; if (args->flags != 0) return -EINVAL; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle)); - if (&obj->base == NULL) { - mutex_unlock(&dev->struct_mutex); + obj = i915_gem_object_lookup(file, args->bo_handle); + if (!obj) return -ENOENT; - } - - /* Need to make sure the object gets inactive eventually. */ - ret = i915_gem_object_flush_active(obj); - if (ret) - goto out; - - if (!obj->active) - goto out; - - /* Do this after OLR check to make sure we make forward progress polling - * on this IOCTL with a timeout == 0 (like busy ioctl) - */ - if (args->timeout_ns == 0) { - ret = -ETIME; - goto out; - } - - drm_gem_object_unreference(&obj->base); - for (i = 0; i < I915_NUM_ENGINES; i++) { - if (obj->last_read_req[i] == NULL) - continue; - - req[n++] = i915_gem_request_reference(obj->last_read_req[i]); - } - - mutex_unlock(&dev->struct_mutex); - - for (i = 0; i < n; i++) { - if (ret == 0) - ret = __i915_wait_request(req[i], true, - args->timeout_ns > 0 ? &args->timeout_ns : NULL, - to_rps_client(file)); - i915_gem_request_unreference(req[i]); + active = __I915_BO_ACTIVE(obj); + for_each_active(active, idx) { + s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL; + ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], true, + timeout, rps); + if (ret) + break; } - return ret; - out: - drm_gem_object_unreference(&obj->base); - mutex_unlock(&dev->struct_mutex); + i915_gem_object_put_unlocked(obj); return ret; } static int - __i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request *from_req, - struct drm_i915_gem_request **to_req) + __i915_gem_object_sync(struct drm_i915_gem_request *to, + struct drm_i915_gem_request *from) { - struct intel_engine_cs *from; int ret; - from = i915_gem_request_get_engine(from_req); - if (to == from) + if (to->engine == from->engine) return 0; - if (i915_gem_request_completed(from_req)) - return 0; - - if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - ret = __i915_wait_request(from_req, - i915->mm.interruptible, - NULL, - &i915->rps.semaphores); + if (!i915.semaphores) { + ret = i915_wait_request(from, + from->i915->mm.interruptible, + NULL, + NO_WAITBOOST); if (ret) return ret; - - i915_gem_object_retire_request(obj, from_req); } else { - int idx = intel_ring_sync_index(from, to); - u32 seqno = i915_gem_request_get_seqno(from_req); - - WARN_ON(!to_req); - - if (seqno <= from->semaphore.sync_seqno[idx]) + int idx = intel_engine_sync_index(from->engine, to->engine); + if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx]) return 0; - if (*to_req == NULL) { - struct drm_i915_gem_request *req; - - req = i915_gem_request_alloc(to, NULL); - if (IS_ERR(req)) - return PTR_ERR(req); - - *to_req = req; - } - - trace_i915_gem_ring_sync_to(*to_req, from, from_req); - ret = to->semaphore.sync_to(*to_req, from, seqno); + trace_i915_gem_ring_sync_to(to, from); + ret = to->engine->semaphore.sync_to(to, from); if (ret) return ret; - /* We use last_read_req because sync_to() - * might have just caused seqno wrap under - * the radar. - */ - from->semaphore.sync_seqno[idx] = - i915_gem_request_get_seqno(obj->last_read_req[from->id]); + from->engine->semaphore.sync_seqno[idx] = from->fence.seqno; } return 0; @@@ -3524,17 -2680,12 +2680,12 @@@ * i915_gem_object_sync - sync an object to a ring. * * @obj: object which may be in use on another ring. - * @to: ring we wish to use the object on. May be NULL. - * @to_req: request we wish to use the object for. See below. - * This will be allocated and returned if a request is - * required but not passed in. + * @to: request we are wishing to use * * This code is meant to abstract object synchronization with the GPU. - * Calling with NULL implies synchronizing the object with the CPU - * rather than a particular GPU ring. Conceptually we serialise writes - * between engines inside the GPU. We only allow one engine to write - * into a buffer at any time, but multiple readers. To ensure each has - * a coherent view of memory, we must: + * Conceptually we serialise writes between engines inside the GPU. + * We only allow one engine to write into a buffer at any time, but + * multiple readers. To ensure each has a coherent view of memory, we must: * * - If there is an outstanding write request to the object, the new * request must wait for it to complete (either CPU or in hw, requests @@@ -3543,44 -2694,39 +2694,39 @@@ * - If we are a write request (pending_write_domain is set), the new * request must wait for outstanding read requests to complete. * - * For CPU synchronisation (NULL to) no request is required. For syncing with - * rings to_req must be non-NULL. However, a request does not have to be - * pre-allocated. If *to_req is NULL and sync commands will be emitted then a - * request will be allocated automatically and returned through *to_req. Note - * that it is not guaranteed that commands will be emitted (because the system - * might already be idle). Hence there is no need to create a request that - * might never have any work submitted. Note further that if a request is - * returned in *to_req, it is the responsibility of the caller to submit - * that request (after potentially adding more work to it). - * * Returns 0 if successful, else propagates up the lower layer error. */ int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request **to_req) + struct drm_i915_gem_request *to) { - const bool readonly = obj->base.pending_write_domain == 0; - struct drm_i915_gem_request *req[I915_NUM_ENGINES]; - int ret, i, n; + struct i915_gem_active *active; + unsigned long active_mask; + int idx; - if (!obj->active) - return 0; + lockdep_assert_held(&obj->base.dev->struct_mutex); - if (to == NULL) - return i915_gem_object_wait_rendering(obj, readonly); + active_mask = i915_gem_object_get_active(obj); + if (!active_mask) + return 0; - n = 0; - if (readonly) { - if (obj->last_write_req) - req[n++] = obj->last_write_req; + if (obj->base.pending_write_domain) { + active = obj->last_read; } else { - for (i = 0; i < I915_NUM_ENGINES; i++) - if (obj->last_read_req[i]) - req[n++] = obj->last_read_req[i]; + active_mask = 1; + active = &obj->last_write; } - for (i = 0; i < n; i++) { - ret = __i915_gem_object_sync(obj, to, req[i], to_req); + + for_each_active(active_mask, idx) { + struct drm_i915_gem_request *request; + int ret; + + request = i915_gem_active_peek(&active[idx], + &obj->base.dev->struct_mutex); + if (!request) + continue; + + ret = __i915_gem_object_sync(to, request); if (ret) return ret; } @@@ -3611,7 -2757,7 +2757,7 @@@ static void i915_gem_object_finish_gtt( static void __i915_vma_iounmap(struct i915_vma *vma) { - GEM_BUG_ON(vma->pin_count); + GEM_BUG_ON(i915_vma_is_pinned(vma)); if (vma->iomap == NULL) return; @@@ -3620,32 -2766,51 +2766,51 @@@ vma->iomap = NULL; } - static int __i915_vma_unbind(struct i915_vma *vma, bool wait) + int i915_vma_unbind(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + unsigned long active; int ret; - if (list_empty(&vma->obj_link)) - return 0; + /* First wait upon any activity as retiring the request may + * have side-effects such as unpinning or even unbinding this vma. + */ + active = i915_vma_get_active(vma); + if (active) { + int idx; + + /* When a closed VMA is retired, it is unbound - eek. + * In order to prevent it from being recursively closed, + * take a pin on the vma so that the second unbind is + * aborted. + */ + __i915_vma_pin(vma); - if (!drm_mm_node_allocated(&vma->node)) { - i915_gem_vma_destroy(vma); - return 0; + for_each_active(active, idx) { + ret = i915_gem_active_retire(&vma->last_read[idx], + &vma->vm->dev->struct_mutex); + if (ret) + break; + } + + __i915_vma_unpin(vma); + if (ret) + return ret; + + GEM_BUG_ON(i915_vma_is_active(vma)); } - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) return -EBUSY; - BUG_ON(obj->pages == NULL); + if (!drm_mm_node_allocated(&vma->node)) + goto destroy; - if (wait) { - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; - } + GEM_BUG_ON(obj->bind_count == 0); + GEM_BUG_ON(!obj->pages); - if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { + if (i915_vma_is_ggtt(vma) && + vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { i915_gem_object_finish_gtt(obj); /* release the fence reg _after_ flushing */ @@@ -3656,13 -2821,16 +2821,16 @@@ __i915_vma_iounmap(vma); } - trace_i915_vma_unbind(vma); + if (likely(!vma->vm->closed)) { + trace_i915_vma_unbind(vma); + vma->vm->unbind_vma(vma); + } + vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - vma->vm->unbind_vma(vma); - vma->bound = 0; + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - list_del_init(&vma->vm_link); - if (vma->is_ggtt) { + if (i915_vma_is_ggtt(vma)) { if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { obj->map_and_fenceable = false; } else if (vma->ggtt_view.pages) { @@@ -3672,13 -2840,11 +2840,11 @@@ vma->ggtt_view.pages = NULL; } - drm_mm_remove_node(&vma->node); - i915_gem_vma_destroy(vma); - /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ - if (list_empty(&obj->vma_list)) - list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); + if (--obj->bind_count == 0) + list_move_tail(&obj->global_list, + &to_i915(obj->base.dev)->mm.unbound_list); /* And finally now the object is completely decoupled from this vma, * we can drop its hold on the backing storage and allow it to be @@@ -3686,36 -2852,28 +2852,28 @@@ */ i915_gem_object_unpin_pages(obj); - return 0; - } - - int i915_vma_unbind(struct i915_vma *vma) - { - return __i915_vma_unbind(vma, true); - } + destroy: + if (unlikely(i915_vma_is_closed(vma))) + i915_vma_destroy(vma); - int __i915_vma_unbind_no_wait(struct i915_vma *vma) - { - return __i915_vma_unbind(vma, false); + return 0; } - int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv) + int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + bool interruptible) { struct intel_engine_cs *engine; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - for_each_engine(engine, dev_priv) { if (engine->last_context == NULL) continue; - ret = intel_engine_idle(engine); + ret = intel_engine_idle(engine, interruptible); if (ret) return ret; } - WARN_ON(i915_verify_lists(dev)); return 0; } @@@ -3753,128 -2911,95 +2911,95 @@@ static bool i915_gem_valid_gtt_space(st } /** - * Finds free space in the GTT aperture and binds the object or a view of it - * there. - * @obj: object to bind - * @vm: address space to bind into - * @ggtt_view: global gtt view if applicable - * @alignment: requested alignment + * i915_vma_insert - finds a slot for the vma in its address space + * @vma: the vma + * @size: requested size in bytes (can be larger than the VMA) + * @alignment: required alignment * @flags: mask of PIN_* flags to use + * + * First we try to allocate some free space that meets the requirements for + * the VMA. Failiing that, if the flags permit, it will evict an old VMA, + * preferrably the oldest idle entry to make room for the new VMA. + * + * Returns: + * 0 on success, negative error code otherwise. */ - static struct i915_vma * - i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - unsigned alignment, - uint64_t flags) + static int + i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 fence_alignment, unfenced_alignment; - u32 search_flag, alloc_flag; + struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); + struct drm_i915_gem_object *obj = vma->obj; u64 start, end; - u64 size, fence_size; - struct i915_vma *vma; + u64 min_alignment; int ret; - if (i915_is_ggtt(vm)) { - u32 view_size; - - if (WARN_ON(!ggtt_view)) - return ERR_PTR(-EINVAL); - - view_size = i915_ggtt_view_size(obj, ggtt_view); - - fence_size = i915_gem_get_gtt_size(dev, - view_size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - view_size, - obj->tiling_mode, - true); - unfenced_alignment = i915_gem_get_gtt_alignment(dev, - view_size, - obj->tiling_mode, - false); - size = flags & PIN_MAPPABLE ? fence_size : view_size; - } else { - fence_size = i915_gem_get_gtt_size(dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - true); - unfenced_alignment = - i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - false); - size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + size = max(size, vma->size); + if (flags & PIN_MAPPABLE) + size = i915_gem_get_ggtt_size(dev_priv, size, + i915_gem_object_get_tiling(obj)); + + min_alignment = + i915_gem_get_ggtt_alignment(dev_priv, size, + i915_gem_object_get_tiling(obj), + flags & PIN_MAPPABLE); + if (alignment == 0) + alignment = min_alignment; + if (alignment & (min_alignment - 1)) { + DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n", + alignment, min_alignment); + return -EINVAL; } start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - end = vm->total; + + end = vma->vm->total; if (flags & PIN_MAPPABLE) - end = min_t(u64, end, ggtt->mappable_end); + end = min_t(u64, end, dev_priv->ggtt.mappable_end); if (flags & PIN_ZONE_4G) end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); - if (alignment == 0) - alignment = flags & PIN_MAPPABLE ? fence_alignment : - unfenced_alignment; - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { - DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", - ggtt_view ? ggtt_view->type : 0, - alignment); - return ERR_PTR(-EINVAL); - } - /* If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain * attempt to find space. */ if (size > end) { - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", - ggtt_view ? ggtt_view->type : 0, - size, + DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", + size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); - return ERR_PTR(-E2BIG); + return -E2BIG; } ret = i915_gem_object_get_pages(obj); if (ret) - return ERR_PTR(ret); + return ret; i915_gem_object_pin_pages(obj); - vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : - i915_gem_obj_lookup_or_create_vma(obj, vm); - - if (IS_ERR(vma)) - goto err_unpin; - if (flags & PIN_OFFSET_FIXED) { - uint64_t offset = flags & PIN_OFFSET_MASK; - - if (offset & (alignment - 1) || offset + size > end) { + u64 offset = flags & PIN_OFFSET_MASK; + if (offset & (alignment - 1) || offset > end - size) { ret = -EINVAL; - goto err_free_vma; + goto err_unpin; } + vma->node.start = offset; vma->node.size = size; vma->node.color = obj->cache_level; - ret = drm_mm_reserve_node(&vm->mm, &vma->node); + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); if (ret) { ret = i915_gem_evict_for_vma(vma); if (ret == 0) - ret = drm_mm_reserve_node(&vm->mm, &vma->node); + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + if (ret) + goto err_unpin; } - if (ret) - goto err_free_vma; } else { + u32 search_flag, alloc_flag; + if (flags & PIN_HIGH) { search_flag = DRM_MM_SEARCH_BELOW; alloc_flag = DRM_MM_CREATE_TOP; @@@ -3883,47 -3008,45 +3008,45 @@@ alloc_flag = DRM_MM_CREATE_DEFAULT; } + /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, + * so we know that we always have a minimum alignment of 4096. + * The drm_mm range manager is optimised to return results + * with zero alignment, so where possible use the optimal + * path. + */ + if (alignment <= 4096) + alignment = 0; + search_free: - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, + ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, + &vma->node, size, alignment, obj->cache_level, start, end, search_flag, alloc_flag); if (ret) { - ret = i915_gem_evict_something(dev, vm, size, alignment, + ret = i915_gem_evict_something(vma->vm, size, alignment, obj->cache_level, start, end, flags); if (ret == 0) goto search_free; - goto err_free_vma; + goto err_unpin; } } - if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { - ret = -EINVAL; - goto err_remove_node; - } - - trace_i915_vma_bind(vma, flags); - ret = i915_vma_bind(vma, obj->cache_level, flags); - if (ret) - goto err_remove_node; + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); - list_add_tail(&vma->vm_link, &vm->inactive_list); + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + obj->bind_count++; - return vma; + return 0; - err_remove_node: - drm_mm_remove_node(&vma->node); - err_free_vma: - i915_gem_vma_destroy(vma); - vma = ERR_PTR(ret); err_unpin: i915_gem_object_unpin_pages(obj); - return vma; + return ret; } bool @@@ -4026,20 -3149,17 +3149,17 @@@ i915_gem_object_flush_cpu_write_domain( int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t old_write_domain, old_read_domains; struct i915_vma *vma; int ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) - return 0; - ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; + if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) + return 0; + /* Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through * shmemfs and that our cache domain tracking remains valid. @@@ -4081,9 -3201,10 +3201,10 @@@ /* And bump the LRU for this access */ vma = i915_gem_obj_to_ggtt(obj); - if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) - list_move_tail(&vma->vm_link, - &ggtt->base.inactive_list); + if (vma && + drm_mm_node_allocated(&vma->node) && + !i915_vma_is_active(vma)) + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); return 0; } @@@ -4106,9 -3227,7 +3227,7 @@@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { - struct drm_device *dev = obj->base.dev; - struct i915_vma *vma, *next; - bool bound = false; + struct i915_vma *vma; int ret = 0; if (obj->cache_level == cache_level) @@@ -4119,21 -3238,28 +3238,28 @@@ * catch the issue of the CS prefetch crossing page boundaries and * reading an invalid PTE on older architectures. */ - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { + restart: + list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; - if (vma->pin_count) { + if (i915_vma_is_pinned(vma)) { DRM_DEBUG("can not change the cache level of pinned objects\n"); return -EBUSY; } - if (!i915_gem_valid_gtt_space(vma, cache_level)) { - ret = i915_vma_unbind(vma); - if (ret) - return ret; - } else - bound = true; + if (i915_gem_valid_gtt_space(vma, cache_level)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + + /* As unbinding may affect other elements in the + * obj->vma_list (due to side-effects from retiring + * an active vma), play safe and restart the iterator. + */ + goto restart; } /* We can reuse the existing drm_mm nodes but need to change the @@@ -4143,7 -3269,7 +3269,7 @@@ * rewrite the PTE in the belief that doing so tramples upon less * state and so involves less work. */ - if (bound) { + if (obj->bind_count) { /* Before we change the PTE, the GPU must not be accessing it. * If we wait upon the object, we know that all the bound * VMA are no longer active. @@@ -4152,7 -3278,7 +3278,7 @@@ if (ret) return ret; - if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { + if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) { /* Access to snoopable pages through the GTT is * incoherent and on some machines causes a hard * lockup. Relinquish the CPU mmaping to force @@@ -4215,8 -3341,8 +3341,8 @@@ int i915_gem_get_caching_ioctl(struct d struct drm_i915_gem_caching *args = data; struct drm_i915_gem_object *obj; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; switch (obj->cache_level) { @@@ -4234,7 -3360,7 +3360,7 @@@ break; } - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return 0; } @@@ -4276,15 -3402,15 +3402,15 @@@ int i915_gem_set_caching_ioctl(struct d if (ret) goto rpm_put; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } ret = i915_gem_object_set_cache_level(obj, level); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); rpm_put: @@@ -4329,7 -3455,7 +3455,7 @@@ i915_gem_object_pin_to_display_plane(st * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. */ - ret = i915_gem_object_ggtt_pin(obj, view, alignment, + ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment, view->type == I915_GGTT_VIEW_NORMAL ? PIN_MAPPABLE : 0); if (ret) @@@ -4383,13 -3509,13 +3509,13 @@@ i915_gem_object_set_to_cpu_domain(struc uint32_t old_write_domain, old_read_domains; int ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) - return 0; - ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return 0; + i915_gem_object_flush_gtt_write_domain(obj); old_write_domain = obj->base.write_domain; @@@ -4464,25 -3590,30 +3590,30 @@@ i915_gem_ring_throttle(struct drm_devic target = request; } if (target) - i915_gem_request_reference(target); + i915_gem_request_get(target); spin_unlock(&file_priv->mm.lock); if (target == NULL) return 0; - ret = __i915_wait_request(target, true, NULL, NULL); - i915_gem_request_unreference(target); + ret = i915_wait_request(target, true, NULL, NULL); + i915_gem_request_put(target); return ret; } static bool - i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) + i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { struct drm_i915_gem_object *obj = vma->obj; - if (alignment && - vma->node.start & (alignment - 1)) + if (!drm_mm_node_allocated(&vma->node)) + return false; + + if (vma->node.size < size) + return true; + + if (alignment && vma->node.start & (alignment - 1)) return true; if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) @@@ -4502,135 -3633,159 +3633,159 @@@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); bool mappable, fenceable; u32 fence_size, fence_alignment; - fence_size = i915_gem_get_gtt_size(obj->base.dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, - obj->base.size, - obj->tiling_mode, - true); + fence_size = i915_gem_get_ggtt_size(dev_priv, + obj->base.size, + i915_gem_object_get_tiling(obj)); + fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, + obj->base.size, + i915_gem_object_get_tiling(obj), + true); fenceable = (vma->node.size == fence_size && (vma->node.start & (fence_alignment - 1)) == 0); mappable = (vma->node.start + fence_size <= - to_i915(obj->base.dev)->ggtt.mappable_end); + dev_priv->ggtt.mappable_end); obj->map_and_fenceable = mappable && fenceable; } - static int - i915_gem_object_do_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - uint32_t alignment, - uint64_t flags) + int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_vma *vma; - unsigned bound; + unsigned int bound = vma->flags; int ret; - if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) - return -ENODEV; - - if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) - return -EINVAL; - - if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) - return -EINVAL; - - if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) - return -EINVAL; - - vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : - i915_gem_obj_to_vma(obj, vm); - - if (vma) { - if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) - return -EBUSY; + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); + GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); - if (i915_vma_misplaced(vma, alignment, flags)) { - WARN(vma->pin_count, - "bo is already pinned in %s with incorrect alignment:" - " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," - " obj->map_and_fenceable=%d\n", - ggtt_view ? "ggtt" : "ppgtt", - upper_32_bits(vma->node.start), - lower_32_bits(vma->node.start), - alignment, - !!(flags & PIN_MAPPABLE), - obj->map_and_fenceable); - ret = i915_vma_unbind(vma); - if (ret) - return ret; - - vma = NULL; - } + if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { + ret = -EBUSY; + goto err; } - bound = vma ? vma->bound : 0; - if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, - flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); - } else { - ret = i915_vma_bind(vma, obj->cache_level, flags); + if ((bound & I915_VMA_BIND_MASK) == 0) { + ret = i915_vma_insert(vma, size, alignment, flags); if (ret) - return ret; + goto err; } - if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && - (bound ^ vma->bound) & GLOBAL_BIND) { + ret = i915_vma_bind(vma, vma->obj->cache_level, flags); + if (ret) + goto err; + + if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); - WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); - } - vma->pin_count++; + GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; - } - int - i915_gem_object_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - uint32_t alignment, - uint64_t flags) - { - return i915_gem_object_do_pin(obj, vm, - i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, - alignment, flags); + err: + __i915_vma_unpin(vma); + return ret; } int i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, - uint32_t alignment, - uint64_t flags) + u64 size, + u64 alignment, + u64 flags) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_vma *vma; + int ret; + + if (!view) + view = &i915_ggtt_view_normal; - BUG_ON(!view); + vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + if (i915_vma_misplaced(vma, size, alignment, flags)) { + if (flags & PIN_NONBLOCK && + (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) + return -ENOSPC; + + WARN(i915_vma_is_pinned(vma), + "bo is already pinned in ggtt with incorrect alignment:" + " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," + " obj->map_and_fenceable=%d\n", + upper_32_bits(vma->node.start), + lower_32_bits(vma->node.start), + alignment, + !!(flags & PIN_MAPPABLE), + obj->map_and_fenceable); + ret = i915_vma_unbind(vma); + if (ret) + return ret; + } - return i915_gem_object_do_pin(obj, &ggtt->base, view, - alignment, flags | PIN_GLOBAL); + return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); } void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view) { - struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); + i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view)); + } + + static __always_inline unsigned __busy_read_flag(unsigned int id) + { + /* Note that we could alias engines in the execbuf API, but + * that would be very unwise as it prevents userspace from + * fine control over engine selection. Ahem. + * + * This should be something like EXEC_MAX_ENGINE instead of + * I915_NUM_ENGINES. + */ + BUILD_BUG_ON(I915_NUM_ENGINES > 16); + return 0x10000 << id; + } + + static __always_inline unsigned int __busy_write_id(unsigned int id) + { + return id; + } + + static __always_inline unsigned + __busy_set_if_active(const struct i915_gem_active *active, + unsigned int (*flag)(unsigned int id)) + { + /* For more discussion about the barriers and locking concerns, + * see __i915_gem_active_get_rcu(). + */ + do { + struct drm_i915_gem_request *request; + unsigned int id; + + request = rcu_dereference(active->request); + if (!request || i915_gem_request_completed(request)) + return 0; + + id = request->engine->exec_id; - WARN_ON(vma->pin_count == 0); - WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); + /* Check that the pointer wasn't reassigned and overwritten. */ + if (request == rcu_access_pointer(active->request)) + return flag(id); + } while (1); + } + + static inline unsigned + busy_check_reader(const struct i915_gem_active *active) + { + return __busy_set_if_active(active, __busy_read_flag); + } - --vma->pin_count; + static inline unsigned + busy_check_writer(const struct i915_gem_active *active) + { + return __busy_set_if_active(active, __busy_write_id); } int @@@ -4639,47 -3794,61 +3794,61 @@@ i915_gem_busy_ioctl(struct drm_device * { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - int ret; + unsigned long active; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + args->busy = 0; + active = __I915_BO_ACTIVE(obj); + if (active) { + int idx; - /* Count all active objects as busy, even if they are currently not used - * by the gpu. Users of this interface expect objects to eventually - * become non-busy without any further actions, therefore emit any - * necessary flushes here. - */ - ret = i915_gem_object_flush_active(obj); - if (ret) - goto unref; + /* Yes, the lookups are intentionally racy. + * + * First, we cannot simply rely on __I915_BO_ACTIVE. We have + * to regard the value as stale and as our ABI guarantees + * forward progress, we confirm the status of each active + * request with the hardware. + * + * Even though we guard the pointer lookup by RCU, that only + * guarantees that the pointer and its contents remain + * dereferencable and does *not* mean that the request we + * have is the same as the one being tracked by the object. + * + * Consider that we lookup the request just as it is being + * retired and freed. We take a local copy of the pointer, + * but before we add its engine into the busy set, the other + * thread reallocates it and assigns it to a task on another + * engine with a fresh and incomplete seqno. + * + * So after we lookup the engine's id, we double check that + * the active request is the same and only then do we add it + * into the busy set. + */ + rcu_read_lock(); - args->busy = 0; - if (obj->active) { - int i; + for_each_active(active, idx) + args->busy |= busy_check_reader(&obj->last_read[idx]); - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; + /* For ABI sanity, we only care that the write engine is in + * the set of read engines. This is ensured by the ordering + * of setting last_read/last_write in i915_vma_move_to_active, + * and then in reverse in retire. + * + * We don't care that the set of active read/write engines + * may change during construction of the result, as it is + * equally liable to change before userspace can inspect + * the result. + */ + args->busy |= busy_check_writer(&obj->last_write); - req = obj->last_read_req[i]; - if (req) - args->busy |= 1 << (16 + req->engine->exec_id); - } - if (obj->last_write_req) - args->busy |= obj->last_write_req->engine->exec_id; + rcu_read_unlock(); } - unref: - drm_gem_object_unreference(&obj->base); - unlock: - mutex_unlock(&dev->struct_mutex); - return ret; + i915_gem_object_put_unlocked(obj); + return 0; } int @@@ -4710,19 -3879,14 +3879,14 @@@ i915_gem_madvise_ioctl(struct drm_devic if (ret) return ret; - obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file_priv, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } - if (i915_gem_obj_is_pinned(obj)) { - ret = -EINVAL; - goto out; - } - if (obj->pages && - obj->tiling_mode != I915_TILING_NONE && + i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (obj->madv == I915_MADV_WILLNEED) i915_gem_object_unpin_pages(obj); @@@ -4739,8 -3903,7 +3903,7 @@@ args->retained = obj->madv != __I915_MADV_PURGED; - out: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); return ret; @@@ -4753,7 -3916,11 +3916,11 @@@ void i915_gem_object_init(struct drm_i9 INIT_LIST_HEAD(&obj->global_list); for (i = 0; i < I915_NUM_ENGINES; i++) - INIT_LIST_HEAD(&obj->engine_list[i]); + init_request_active(&obj->last_read[i], + i915_gem_object_retire__read); + init_request_active(&obj->last_write, + i915_gem_object_retire__write); + init_request_active(&obj->last_fence, NULL); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@@ -4795,7 -3962,7 +3962,7 @@@ struct drm_i915_gem_object *i915_gem_ob mask |= __GFP_DMA32; } - mapping = file_inode(obj->base.filp)->i_mapping; + mapping = obj->base.filp->f_mapping; mapping_set_gfp_mask(mapping, mask); i915_gem_object_init(obj, &i915_gem_object_ops); @@@ -4865,33 -4032,31 +4032,31 @@@ void i915_gem_free_object(struct drm_ge trace_i915_gem_object_destroy(obj); + /* All file-owned VMA should have been released by this point through + * i915_gem_close_object(), or earlier by i915_gem_context_close(). + * However, the object may also be bound into the global GTT (e.g. + * older GPUs without per-process support, or for direct access through + * the GTT either for the user or for scanout). Those VMA still need to + * unbound now. + */ list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { - int ret; - - vma->pin_count = 0; - ret = i915_vma_unbind(vma); - if (WARN_ON(ret == -ERESTARTSYS)) { - bool was_interruptible; - - was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; - - WARN_ON(i915_vma_unbind(vma)); - - dev_priv->mm.interruptible = was_interruptible; - } + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(i915_vma_is_active(vma)); + vma->flags &= ~I915_VMA_PIN_MASK; + i915_vma_close(vma); } + GEM_BUG_ON(obj->bind_count); /* Stolen objects don't hold a ref, but do hold pin count. Fix that up * before progressing. */ if (obj->stolen) i915_gem_object_unpin_pages(obj); - WARN_ON(obj->frontbuffer_bits); + WARN_ON(atomic_read(&obj->frontbuffer_bits)); if (obj->pages && obj->madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && - obj->tiling_mode != I915_TILING_NONE) + i915_gem_object_is_tiled(obj)) i915_gem_object_unpin_pages(obj); if (WARN_ON(obj->pages_pin_count)) @@@ -4899,7 -4064,6 +4064,6 @@@ if (discard_backing_storage(obj)) obj->madv = I915_MADV_DONTNEED; i915_gem_object_put_pages(obj); - i915_gem_object_free_mmap_offset(obj); BUG_ON(obj->pages); @@@ -4938,51 -4102,39 +4102,39 @@@ struct i915_vma *i915_gem_obj_to_ggtt_v GEM_BUG_ON(!view); list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) + if (i915_vma_is_ggtt(vma) && + i915_ggtt_view_equal(&vma->ggtt_view, view)) return vma; return NULL; } - void i915_gem_vma_destroy(struct i915_vma *vma) - { - WARN_ON(vma->node.allocated); - - /* Keep the vma as a placeholder in the execbuffer reservation lists */ - if (!list_empty(&vma->exec_list)) - return; - - if (!vma->is_ggtt) - i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); - - list_del(&vma->obj_link); - - kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); - } - - static void - i915_gem_stop_engines(struct drm_device *dev) + int i915_gem_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine; - - for_each_engine(engine, dev_priv) - dev_priv->gt.stop_engine(engine); - } + int ret; - int - i915_gem_suspend(struct drm_device *dev) - { - struct drm_i915_private *dev_priv = to_i915(dev); - int ret = 0; + intel_suspend_gt_powersave(dev_priv); mutex_lock(&dev->struct_mutex); - ret = i915_gem_wait_for_idle(dev_priv); + + /* We have to flush all the executing contexts to main memory so + * that they can saved in the hibernation image. To ensure the last + * context image is coherent, we have to switch away from it. That + * leaves the dev_priv->kernel_context still active when + * we actually suspend, and its image in memory may not match the GPU + * state. Fortunately, the kernel_context is disposable and we do + * not rely on its state. + */ + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + goto err; + + ret = i915_gem_wait_for_idle(dev_priv, true); if (ret) goto err; i915_gem_retire_requests(dev_priv); - i915_gem_stop_engines(dev); i915_gem_context_lost(dev_priv); mutex_unlock(&dev->struct_mutex); @@@ -5002,6 -4154,23 +4154,23 @@@ err return ret; } + void i915_gem_resume(struct drm_device *dev) + { + struct drm_i915_private *dev_priv = to_i915(dev); + + mutex_lock(&dev->struct_mutex); + i915_gem_restore_gtt_mappings(dev); + + /* As we didn't flush the kernel context before suspend, we cannot + * guarantee that the context image is complete. So let's just reset + * it and start again. + */ + if (i915.enable_execlists) + intel_lr_context_reset(dev_priv, dev_priv->kernel_context); + + mutex_unlock(&dev->struct_mutex); + } + void i915_gem_init_swizzling(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@@ -5054,53 -4223,6 +4223,6 @@@ static void init_unused_rings(struct dr } } - int i915_gem_init_engines(struct drm_device *dev) - { - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - ret = intel_init_render_ring_buffer(dev); - if (ret) - return ret; - - if (HAS_BSD(dev)) { - ret = intel_init_bsd_ring_buffer(dev); - if (ret) - goto cleanup_render_ring; - } - - if (HAS_BLT(dev)) { - ret = intel_init_blt_ring_buffer(dev); - if (ret) - goto cleanup_bsd_ring; - } - - if (HAS_VEBOX(dev)) { - ret = intel_init_vebox_ring_buffer(dev); - if (ret) - goto cleanup_blt_ring; - } - - if (HAS_BSD2(dev)) { - ret = intel_init_bsd2_ring_buffer(dev); - if (ret) - goto cleanup_vebox_ring; - } - - return 0; - - cleanup_vebox_ring: - intel_cleanup_engine(&dev_priv->engine[VECS]); - cleanup_blt_ring: - intel_cleanup_engine(&dev_priv->engine[BCS]); - cleanup_bsd_ring: - intel_cleanup_engine(&dev_priv->engine[VCS]); - cleanup_render_ring: - intel_cleanup_engine(&dev_priv->engine[RCS]); - - return ret; - } - int i915_gem_init_hw(struct drm_device *dev) { @@@ -5167,6 -4289,27 +4289,27 @@@ out return ret; } + bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) + { + if (INTEL_INFO(dev_priv)->gen < 6) + return false; + + /* TODO: make semaphores and Execlists play nicely together */ + if (i915.enable_execlists) + return false; + + if (value >= 0) + return value; + + #ifdef CONFIG_INTEL_IOMMU + /* Enable semaphores on SNB when IO remapping is off */ + if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped) + return false; + #endif + + return true; + } + int i915_gem_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@@ -5175,15 -4318,9 +4318,9 @@@ mutex_lock(&dev->struct_mutex); if (!i915.enable_execlists) { - dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; - dev_priv->gt.init_engines = i915_gem_init_engines; - dev_priv->gt.cleanup_engine = intel_cleanup_engine; - dev_priv->gt.stop_engine = intel_stop_engine; + dev_priv->gt.cleanup_engine = intel_engine_cleanup; } else { - dev_priv->gt.execbuf_submit = intel_execlists_submission; - dev_priv->gt.init_engines = intel_logical_rings_init; dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; - dev_priv->gt.stop_engine = intel_logical_ring_stop; } /* This is just a security blanket to placate dragons. @@@ -5195,19 -4332,22 +4332,22 @@@ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); i915_gem_init_userptr(dev_priv); - i915_gem_init_ggtt(dev); + + ret = i915_gem_init_ggtt(dev_priv); + if (ret) + goto out_unlock; ret = i915_gem_context_init(dev); if (ret) goto out_unlock; - ret = dev_priv->gt.init_engines(dev); + ret = intel_engines_init(dev); if (ret) goto out_unlock; ret = i915_gem_init_hw(dev); if (ret == -EIO) { - /* Allow ring initialisation to fail by marking the GPU as + /* Allow engine initialisation to fail by marking the GPU as * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ @@@ -5236,7 -4376,6 +4376,6 @@@ i915_gem_cleanup_engines(struct drm_dev static void init_engine_lists(struct intel_engine_cs *engine) { - INIT_LIST_HEAD(&engine->active_list); INIT_LIST_HEAD(&engine->request_list); } @@@ -5283,10 -4422,11 +4422,11 @@@ i915_gem_load_init(struct drm_device *d dev_priv->requests = kmem_cache_create("i915_gem_request", sizeof(struct drm_i915_gem_request), 0, - SLAB_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU, NULL); - INIT_LIST_HEAD(&dev_priv->vm_list); INIT_LIST_HEAD(&dev_priv->context_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); @@@ -5310,7 -4450,7 +4450,7 @@@ dev_priv->mm.interruptible = true; - mutex_init(&dev_priv->fb_tracking.lock); + spin_lock_init(&dev_priv->fb_tracking.lock); } void i915_gem_load_cleanup(struct drm_device *dev) @@@ -5320,6 -4460,9 +4460,9 @@@ kmem_cache_destroy(dev_priv->requests); kmem_cache_destroy(dev_priv->vmas); kmem_cache_destroy(dev_priv->objects); + + /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ + rcu_barrier(); } int i915_gem_freeze_late(struct drm_i915_private *dev_priv) @@@ -5353,21 -4496,15 +4496,15 @@@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_request *request; /* Clean up our request list when the client is going away, so that * later retire_requests won't dereference our soon-to-be-gone * file_priv. */ spin_lock(&file_priv->mm.lock); - while (!list_empty(&file_priv->mm.request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&file_priv->mm.request_list, - struct drm_i915_gem_request, - client_list); - list_del(&request->client_list); + list_for_each_entry(request, &file_priv->mm.request_list, client_list) request->file_priv = NULL; - } spin_unlock(&file_priv->mm.lock); if (!list_empty(&file_priv->rps.link)) { @@@ -5396,7 -4533,7 +4533,7 @@@ int i915_gem_open(struct drm_device *de spin_lock_init(&file_priv->mm.lock); INIT_LIST_HEAD(&file_priv->mm.request_list); - file_priv->bsd_ring = -1; + file_priv->bsd_engine = -1; ret = i915_gem_context_open(dev, file); if (ret) @@@ -5418,16 -4555,23 +4555,23 @@@ void i915_gem_track_fb(struct drm_i915_ struct drm_i915_gem_object *new, unsigned frontbuffer_bits) { + /* Control of individual bits within the mask are guarded by + * the owning plane->mutex, i.e. we can never see concurrent + * manipulation of individual bits. But since the bitfield as a whole + * is updated using RMW, we need to use atomics in order to update + * the bits. + */ + BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > + sizeof(atomic_t) * BITS_PER_BYTE); + if (old) { - WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); - WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); - old->frontbuffer_bits &= ~frontbuffer_bits; + WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); + atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); } if (new) { - WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); - WARN_ON(new->frontbuffer_bits & frontbuffer_bits); - new->frontbuffer_bits |= frontbuffer_bits; + WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); + atomic_or(frontbuffer_bits, &new->frontbuffer_bits); } } @@@ -5441,7 -4585,7 +4585,7 @@@ u64 i915_gem_obj_offset(struct drm_i915 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) continue; if (vma->vm == vm) @@@ -5459,7 -4603,8 +4603,8 @@@ u64 i915_gem_obj_ggtt_offset_view(struc struct i915_vma *vma; list_for_each_entry(vma, &o->vma_list, obj_link) - if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) + if (i915_vma_is_ggtt(vma) && + i915_ggtt_view_equal(&vma->ggtt_view, view)) return vma->node.start; WARN(1, "global vma for this object not found. (view=%u)\n", view->type); @@@ -5472,7 -4617,7 +4617,7 @@@ bool i915_gem_obj_bound(struct drm_i915 struct i915_vma *vma; list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) continue; if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) @@@ -5488,7 -4633,7 +4633,7 @@@ bool i915_gem_obj_ggtt_bound_view(struc struct i915_vma *vma; list_for_each_entry(vma, &o->vma_list, obj_link) - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && i915_ggtt_view_equal(&vma->ggtt_view, view) && drm_mm_node_allocated(&vma->node)) return true; @@@ -5496,17 -4641,6 +4641,6 @@@ return false; } - bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) - { - struct i915_vma *vma; - - list_for_each_entry(vma, &o->vma_list, obj_link) - if (drm_mm_node_allocated(&vma->node)) - return true; - - return false; - } - unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) { struct i915_vma *vma; @@@ -5514,7 -4648,7 +4648,7 @@@ GEM_BUG_ON(list_empty(&o->vma_list)); list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) return vma->node.size; } @@@ -5526,7 -4660,7 +4660,7 @@@ bool i915_gem_obj_is_pinned(struct drm_ { struct i915_vma *vma; list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->pin_count > 0) + if (i915_vma_is_pinned(vma)) return true; return false; @@@ -5584,6 -4718,6 +4718,6 @@@ i915_gem_object_create_from_data(struc return obj; fail: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return ERR_PTR(ret); } diff --combined drivers/gpu/drm/i915/intel_display.c index 8cc361114112,9cbf5431c1e3..c6f27ab99e8f --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@@ -34,6 -34,7 +34,7 @@@ #include #include #include "intel_drv.h" + #include "intel_frontbuffer.h" #include #include "i915_drv.h" #include "i915_gem_dmabuf.h" @@@ -2465,9 -2466,8 +2466,8 @@@ intel_alloc_initial_plane_obj(struct in return false; } - obj->tiling_mode = plane_config->tiling; - if (obj->tiling_mode == I915_TILING_X) - obj->stride = fb->pitches[0]; + if (plane_config->tiling == I915_TILING_X) + obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X; mode_cmd.pixel_format = fb->pixel_format; mode_cmd.width = fb->width; @@@ -2488,7 -2488,7 +2488,7 @@@ return true; out_unref_obj: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); return false; } @@@ -2565,7 -2565,7 +2565,7 @@@ intel_find_initial_plane_obj(struct int * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - to_intel_plane_state(plane_state)->visible = false; + to_intel_plane_state(plane_state)->base.visible = false; crtc_state->plane_mask &= ~(1 << drm_plane_index(primary)); intel_pre_disable_primary_noatomic(&intel_crtc->base); intel_plane->disable_plane(primary, &intel_crtc->base); @@@ -2583,24 -2583,25 +2583,25 @@@ valid_fb plane_state->crtc_w = fb->width; plane_state->crtc_h = fb->height; - intel_state->src.x1 = plane_state->src_x; - intel_state->src.y1 = plane_state->src_y; - intel_state->src.x2 = plane_state->src_x + plane_state->src_w; - intel_state->src.y2 = plane_state->src_y + plane_state->src_h; - intel_state->dst.x1 = plane_state->crtc_x; - intel_state->dst.y1 = plane_state->crtc_y; - intel_state->dst.x2 = plane_state->crtc_x + plane_state->crtc_w; - intel_state->dst.y2 = plane_state->crtc_y + plane_state->crtc_h; + intel_state->base.src.x1 = plane_state->src_x; + intel_state->base.src.y1 = plane_state->src_y; + intel_state->base.src.x2 = plane_state->src_x + plane_state->src_w; + intel_state->base.src.y2 = plane_state->src_y + plane_state->src_h; + intel_state->base.dst.x1 = plane_state->crtc_x; + intel_state->base.dst.y1 = plane_state->crtc_y; + intel_state->base.dst.x2 = plane_state->crtc_x + plane_state->crtc_w; + intel_state->base.dst.y2 = plane_state->crtc_y + plane_state->crtc_h; obj = intel_fb_obj(fb); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dev_priv->preserve_bios_swizzle = true; drm_framebuffer_reference(fb); primary->fb = primary->state->fb = fb; primary->crtc = primary->state->crtc = &intel_crtc->base; intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary)); - obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit; + atomic_or(to_intel_plane(primary)->frontbuffer_bit, + &obj->frontbuffer_bits); } static void i9xx_update_primary_plane(struct drm_plane *primary, @@@ -2618,8 -2619,8 +2619,8 @@@ i915_reg_t reg = DSPCNTR(plane); unsigned int rotation = plane_state->base.rotation; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); - int x = plane_state->src.x1 >> 16; - int y = plane_state->src.y1 >> 16; + int x = plane_state->base.src.x1 >> 16; + int y = plane_state->base.src.y1 >> 16; dspcntr = DISPPLANE_GAMMA_ENABLE; @@@ -2670,8 -2671,7 +2671,7 @@@ BUG(); } - if (INTEL_INFO(dev)->gen >= 4 && - obj->tiling_mode != I915_TILING_NONE) + if (INTEL_INFO(dev)->gen >= 4 && i915_gem_object_is_tiled(obj)) dspcntr |= DISPPLANE_TILED; if (IS_G4X(dev)) @@@ -2688,7 -2688,7 +2688,7 @@@ intel_crtc->dspaddr_offset = linear_offset; } - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { dspcntr |= DISPPLANE_ROTATE_180; x += (crtc_state->pipe_src_w - 1); @@@ -2748,8 -2748,8 +2748,8 @@@ static void ironlake_update_primary_pla i915_reg_t reg = DSPCNTR(plane); unsigned int rotation = plane_state->base.rotation; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); - int x = plane_state->src.x1 >> 16; - int y = plane_state->src.y1 >> 16; + int x = plane_state->base.src.x1 >> 16; + int y = plane_state->base.src.y1 >> 16; dspcntr = DISPPLANE_GAMMA_ENABLE; dspcntr |= DISPLAY_PLANE_ENABLE; @@@ -2780,7 -2780,7 +2780,7 @@@ BUG(); } - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dspcntr |= DISPPLANE_TILED; if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) @@@ -2791,7 -2791,7 +2791,7 @@@ intel_compute_tile_offset(&x, &y, fb, 0, fb->pitches[0], rotation); linear_offset -= intel_crtc->dspaddr_offset; - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { dspcntr |= DISPPLANE_ROTATE_180; if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) { @@@ -2952,17 -2952,17 +2952,17 @@@ u32 skl_plane_ctl_tiling(uint64_t fb_mo u32 skl_plane_ctl_rotation(unsigned int rotation) { switch (rotation) { - case BIT(DRM_ROTATE_0): + case DRM_ROTATE_0: break; /* * DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr * while i915 HW rotation is clockwise, thats why this swapping. */ - case BIT(DRM_ROTATE_90): + case DRM_ROTATE_90: return PLANE_CTL_ROTATE_270; - case BIT(DRM_ROTATE_180): + case DRM_ROTATE_180: return PLANE_CTL_ROTATE_180; - case BIT(DRM_ROTATE_270): + case DRM_ROTATE_270: return PLANE_CTL_ROTATE_90; default: MISSING_CASE(rotation); @@@ -2987,14 -2987,14 +2987,14 @@@ static void skylake_update_primary_plan int x_offset, y_offset; u32 surf_addr; int scaler_id = plane_state->scaler_id; - int src_x = plane_state->src.x1 >> 16; - int src_y = plane_state->src.y1 >> 16; - int src_w = drm_rect_width(&plane_state->src) >> 16; - int src_h = drm_rect_height(&plane_state->src) >> 16; - int dst_x = plane_state->dst.x1; - int dst_y = plane_state->dst.y1; - int dst_w = drm_rect_width(&plane_state->dst); - int dst_h = drm_rect_height(&plane_state->dst); + int src_x = plane_state->base.src.x1 >> 16; + int src_y = plane_state->base.src.y1 >> 16; + int src_w = drm_rect_width(&plane_state->base.src) >> 16; + int src_h = drm_rect_height(&plane_state->base.src) >> 16; + int dst_x = plane_state->base.dst.x1; + int dst_y = plane_state->base.dst.y1; + int dst_w = drm_rect_width(&plane_state->base.dst); + int dst_h = drm_rect_height(&plane_state->base.dst); plane_ctl = PLANE_CTL_ENABLE | PLANE_CTL_PIPE_GAMMA_ENABLE | @@@ -3009,7 -3009,7 +3009,7 @@@ fb->pixel_format); surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj, 0); - WARN_ON(drm_rect_width(&plane_state->src) == 0); + WARN_ON(drm_rect_width(&plane_state->base.src) == 0); if (intel_rotation_90_or_270(rotation)) { int cpp = drm_format_plane_cpp(fb->pixel_format, 0); @@@ -3098,7 -3098,7 +3098,7 @@@ static void intel_update_primary_planes drm_modeset_lock_crtc(crtc, &plane->base); plane_state = to_intel_plane_state(plane->base.state); - if (plane_state->visible) + if (plane_state->base.visible) plane->update_plane(&plane->base, to_intel_crtc_state(crtc->state), plane_state); @@@ -4248,7 -4248,7 +4248,7 @@@ int skl_update_scaler_crtc(struct intel intel_crtc->pipe, SKL_CRTC_INDEX); return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX, - &state->scaler_state.scaler_id, BIT(DRM_ROTATE_0), + &state->scaler_state.scaler_id, DRM_ROTATE_0, state->pipe_src_w, state->pipe_src_h, adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay); } @@@ -4273,7 -4273,7 +4273,7 @@@ static int skl_update_scaler_plane(stru struct drm_framebuffer *fb = plane_state->base.fb; int ret; - bool force_detach = !fb || !plane_state->visible; + bool force_detach = !fb || !plane_state->base.visible; DRM_DEBUG_KMS("Updating scaler for [PLANE:%d:%s] scaler_user index %u.%u\n", intel_plane->base.base.id, intel_plane->base.name, @@@ -4283,10 -4283,10 +4283,10 @@@ drm_plane_index(&intel_plane->base), &plane_state->scaler_id, plane_state->base.rotation, - drm_rect_width(&plane_state->src) >> 16, - drm_rect_height(&plane_state->src) >> 16, - drm_rect_width(&plane_state->dst), - drm_rect_height(&plane_state->dst)); + drm_rect_width(&plane_state->base.src) >> 16, + drm_rect_height(&plane_state->base.src) >> 16, + drm_rect_width(&plane_state->base.dst), + drm_rect_height(&plane_state->base.dst)); if (ret || plane_state->scaler_id < 0) return ret; @@@ -4564,12 -4564,11 +4564,11 @@@ static void intel_post_plane_update(str struct drm_atomic_state *old_state = old_crtc_state->base.state; struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc->base.state); - struct drm_device *dev = crtc->base.dev; struct drm_plane *primary = crtc->base.primary; struct drm_plane_state *old_pri_state = drm_atomic_get_existing_plane_state(old_state, primary); - intel_frontbuffer_flip(dev, pipe_config->fb_bits); + intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits); crtc->wm.cxsr_allowed = true; @@@ -4584,9 -4583,9 +4583,9 @@@ intel_fbc_post_update(crtc); - if (primary_state->visible && + if (primary_state->base.visible && (needs_modeset(&pipe_config->base) || - !old_primary_state->visible)) + !old_primary_state->base.visible)) intel_post_enable_primary(&crtc->base); } } @@@ -4612,8 -4611,8 +4611,8 @@@ static void intel_pre_plane_update(stru intel_fbc_pre_update(crtc, pipe_config, primary_state); - if (old_primary_state->visible && - (modeset || !primary_state->visible)) + if (old_primary_state->base.visible && + (modeset || !primary_state->base.visible)) intel_pre_disable_primary(&crtc->base); } @@@ -4692,7 -4691,7 +4691,7 @@@ static void intel_crtc_disable_planes(s * to compute the mask of flip planes precisely. For the time being * consider this a flip to a NULL plane. */ - intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe)); + intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe)); } static void ironlake_crtc_enable(struct drm_crtc *crtc) @@@ -6290,13 -6289,13 +6289,13 @@@ static void intel_crtc_disable_noatomic if (!intel_crtc->active) return; - if (to_intel_plane_state(crtc->primary->state)->visible) { + if (to_intel_plane_state(crtc->primary->state)->base.visible) { WARN_ON(intel_crtc->flip_work); intel_pre_disable_primary_noatomic(crtc); intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); - to_intel_plane_state(crtc->primary->state)->visible = false; + to_intel_plane_state(crtc->primary->state)->base.visible = false; } dev_priv->display.crtc_disable(crtc); @@@ -10170,7 -10169,7 +10169,7 @@@ static void i845_update_cursor(struct d struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t cntl = 0, size = 0; - if (plane_state && plane_state->visible) { + if (plane_state && plane_state->base.visible) { unsigned int width = plane_state->base.crtc_w; unsigned int height = plane_state->base.crtc_h; unsigned int stride = roundup_pow_of_two(width) * 4; @@@ -10234,7 -10233,7 +10233,7 @@@ static void i9xx_update_cursor(struct d int pipe = intel_crtc->pipe; uint32_t cntl = 0; - if (plane_state && plane_state->visible) { + if (plane_state && plane_state->base.visible) { cntl = MCURSOR_GAMMA_ENABLE; switch (plane_state->base.crtc_w) { case 64: @@@ -10255,7 -10254,7 +10254,7 @@@ if (HAS_DDI(dev)) cntl |= CURSOR_PIPE_CSC_ENABLE; - if (plane_state->base.rotation == BIT(DRM_ROTATE_180)) + if (plane_state->base.rotation == DRM_ROTATE_180) cntl |= CURSOR_ROTATE_180; } @@@ -10301,7 -10300,7 +10300,7 @@@ static void intel_crtc_update_cursor(st /* ILK+ do this automagically */ if (HAS_GMCH_DISPLAY(dev) && - plane_state->base.rotation == BIT(DRM_ROTATE_180)) { + plane_state->base.rotation == DRM_ROTATE_180) { base += (plane_state->base.crtc_h * plane_state->base.crtc_w - 1) * 4; } @@@ -10434,7 -10433,7 +10433,7 @@@ intel_framebuffer_create_for_mode(struc fb = intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return fb; } @@@ -10945,13 -10944,13 +10944,13 @@@ static void intel_unpin_work_fn(struct mutex_lock(&dev->struct_mutex); intel_unpin_fb_obj(work->old_fb, primary->state->rotation); - drm_gem_object_unreference(&work->pending_flip_obj->base); - - if (work->flip_queued_req) - i915_gem_request_assign(&work->flip_queued_req, NULL); + i915_gem_object_put(work->pending_flip_obj); mutex_unlock(&dev->struct_mutex); - intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit); + i915_gem_request_put(work->flip_queued_req); + + intel_frontbuffer_flip_complete(to_i915(dev), + to_intel_plane(primary)->frontbuffer_bit); intel_fbc_post_update(crtc); drm_framebuffer_unreference(work->old_fb); @@@ -11116,7 -11115,7 +11115,7 @@@ static int intel_gen2_queue_flip(struc struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@@ -11132,13 -11131,13 +11131,13 @@@ flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, 0); /* aux display base address, unused */ + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, 0); /* aux display base address, unused */ return 0; } @@@ -11150,7 -11149,7 +11149,7 @@@ static int intel_gen3_queue_flip(struc struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@@ -11163,13 -11162,13 +11162,13 @@@ flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, MI_NOOP); return 0; } @@@ -11181,7 -11180,7 +11180,7 @@@ static int intel_gen4_queue_flip(struc struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@@ -11195,11 -11194,11 +11194,11 @@@ * Display Registers (which do not change across a page-flip) * so we need only reprogram the base address. */ - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset | - obj->tiling_mode); + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | + i915_gem_object_get_tiling(obj)); /* XXX Enabling the panel-fitter across page-flip is so far * untested on non-native modes, so ignore it for now. @@@ -11207,7 -11206,7 +11206,7 @@@ */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(engine, pf | pipesrc); + intel_ring_emit(ring, pf | pipesrc); return 0; } @@@ -11219,7 -11218,7 +11218,7 @@@ static int intel_gen6_queue_flip(struc struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@@ -11229,10 -11228,10 +11228,10 @@@ if (ret) return ret; - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0] | obj->tiling_mode); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj)); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); /* Contrary to the suggestions in the documentation, * "Enable Panel Fitter" does not seem to be required when page @@@ -11242,7 -11241,7 +11241,7 @@@ */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(engine, pf | pipesrc); + intel_ring_emit(ring, pf | pipesrc); return 0; } @@@ -11254,7 -11253,7 +11253,7 @@@ static int intel_gen7_queue_flip(struc struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t plane_bit = 0; int len, ret; @@@ -11275,7 -11274,7 +11274,7 @@@ } len = 4; - if (engine->id == RCS) { + if (req->engine->id == RCS) { len += 6; /* * On Gen 8, SRM is now taking an extra dword to accommodate @@@ -11313,30 -11312,30 +11312,30 @@@ * for the RCS also doesn't appear to drop events. Setting the DERRMR * to zero does lead to lockups within MI_DISPLAY_FLIP. */ - if (engine->id == RCS) { - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, DERRMR); - intel_ring_emit(engine, ~(DERRMR_PIPEA_PRI_FLIP_DONE | + if (req->engine->id == RCS) { + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, DERRMR); + intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE | DERRMR_PIPEB_PRI_FLIP_DONE | DERRMR_PIPEC_PRI_FLIP_DONE)); if (IS_GEN8(dev)) - intel_ring_emit(engine, MI_STORE_REGISTER_MEM_GEN8 | + intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT); else - intel_ring_emit(engine, MI_STORE_REGISTER_MEM | + intel_ring_emit(ring, MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(engine, DERRMR); - intel_ring_emit(engine, engine->scratch.gtt_offset + 256); + intel_ring_emit_reg(ring, DERRMR); + intel_ring_emit(ring, req->engine->scratch.gtt_offset + 256); if (IS_GEN8(dev)) { - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); } } - intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | plane_bit); - intel_ring_emit(engine, (fb->pitches[0] | obj->tiling_mode)); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, (MI_NOOP)); + intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); + intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj)); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, (MI_NOOP)); return 0; } @@@ -11371,7 -11370,8 +11370,8 @@@ static bool use_mmio_flip(struct intel_ if (resv && !reservation_object_test_signaled_rcu(resv, false)) return true; - return engine != i915_gem_request_get_engine(obj->last_write_req); + return engine != i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); } static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, @@@ -11440,7 -11440,7 +11440,7 @@@ static void ilk_do_mmio_flip(struct int dspcntr = I915_READ(reg); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dspcntr |= DISPPLANE_TILED; else dspcntr &= ~DISPPLANE_TILED; @@@ -11463,9 -11463,9 +11463,9 @@@ static void intel_mmio_flip_work_func(s struct reservation_object *resv; if (work->flip_queued_req) - WARN_ON(__i915_wait_request(work->flip_queued_req, - false, NULL, - &dev_priv->rps.mmioflips)); + WARN_ON(i915_wait_request(work->flip_queued_req, + false, NULL, + NO_WAITBOOST)); /* For framebuffer backed by dmabuf, wait for fence */ resv = i915_gem_object_get_dmabuf_resv(obj); @@@ -11576,7 -11576,7 +11576,7 @@@ static int intel_crtc_page_flip(struct struct intel_flip_work *work; struct intel_engine_cs *engine; bool mmio_flip; - struct drm_i915_gem_request *request = NULL; + struct drm_i915_gem_request *request; int ret; /* @@@ -11642,7 -11642,6 +11642,6 @@@ /* Reference the objects for the scheduled work. */ drm_framebuffer_reference(work->old_fb); - drm_gem_object_reference(&obj->base); crtc->primary->fb = fb; update_state_fb(crtc->primary); @@@ -11650,7 -11649,7 +11649,7 @@@ intel_fbc_pre_update(intel_crtc, intel_crtc->config, to_intel_plane_state(primary->state)); - work->pending_flip_obj = obj; + work->pending_flip_obj = i915_gem_object_get(obj); ret = i915_mutex_lock_interruptible(dev); if (ret) @@@ -11669,13 -11668,15 +11668,15 @@@ if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { engine = &dev_priv->engine[BCS]; - if (obj->tiling_mode != intel_fb_obj(work->old_fb)->tiling_mode) + if (i915_gem_object_get_tiling(obj) != + i915_gem_object_get_tiling(intel_fb_obj(work->old_fb))) /* vlv: DISPLAY_FLIP fails to change tiling */ engine = NULL; } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { engine = &dev_priv->engine[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { - engine = i915_gem_request_get_engine(obj->last_write_req); + engine = i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); if (engine == NULL || engine->id != RCS) engine = &dev_priv->engine[BCS]; } else { @@@ -11684,22 -11685,6 +11685,6 @@@ mmio_flip = use_mmio_flip(engine, obj); - /* When using CS flips, we want to emit semaphores between rings. - * However, when using mmio flips we will create a task to do the - * synchronisation, so all we want here is to pin the framebuffer - * into the display plane and skip any waits. - */ - if (!mmio_flip) { - ret = i915_gem_object_sync(obj, engine, &request); - if (!ret && !request) { - request = i915_gem_request_alloc(engine, NULL); - ret = PTR_ERR_OR_ZERO(request); - } - - if (ret) - goto cleanup_pending; - } - ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation); if (ret) goto cleanup_pending; @@@ -11712,19 -11697,28 +11697,28 @@@ if (mmio_flip) { INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); - i915_gem_request_assign(&work->flip_queued_req, - obj->last_write_req); - + work->flip_queued_req = i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); schedule_work(&work->mmio_work); } else { - i915_gem_request_assign(&work->flip_queued_req, request); + request = i915_gem_request_alloc(engine, engine->last_context); + if (IS_ERR(request)) { + ret = PTR_ERR(request); + goto cleanup_unpin; + } + + ret = i915_gem_object_sync(obj, request); + if (ret) + goto cleanup_request; + ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request, page_flip_flags); if (ret) - goto cleanup_unpin; + goto cleanup_request; intel_mark_page_flip_active(intel_crtc, work); + work->flip_queued_req = i915_gem_request_get(request); i915_add_request_no_flush(request); } @@@ -11732,25 -11726,25 +11726,25 @@@ to_intel_plane(primary)->frontbuffer_bit); mutex_unlock(&dev->struct_mutex); - intel_frontbuffer_flip_prepare(dev, + intel_frontbuffer_flip_prepare(to_i915(dev), to_intel_plane(primary)->frontbuffer_bit); trace_i915_flip_request(intel_crtc->plane, obj); return 0; + cleanup_request: + i915_add_request_no_flush(request); cleanup_unpin: intel_unpin_fb_obj(fb, crtc->primary->state->rotation); cleanup_pending: - if (!IS_ERR_OR_NULL(request)) - i915_add_request_no_flush(request); atomic_dec(&intel_crtc->unpin_work_count); mutex_unlock(&dev->struct_mutex); cleanup: crtc->primary->fb = old_fb; update_state_fb(crtc->primary); - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); drm_framebuffer_unreference(work->old_fb); spin_lock_irq(&dev->event_lock); @@@ -11818,7 -11812,7 +11812,7 @@@ static bool intel_wm_need_update(struc struct intel_plane_state *cur = to_intel_plane_state(plane->state); /* Update watermarks on tiling or size changes. */ - if (new->visible != cur->visible) + if (new->base.visible != cur->base.visible) return true; if (!cur->base.fb || !new->base.fb) @@@ -11826,10 -11820,10 +11820,10 @@@ if (cur->base.fb->modifier[0] != new->base.fb->modifier[0] || cur->base.rotation != new->base.rotation || - drm_rect_width(&new->src) != drm_rect_width(&cur->src) || - drm_rect_height(&new->src) != drm_rect_height(&cur->src) || - drm_rect_width(&new->dst) != drm_rect_width(&cur->dst) || - drm_rect_height(&new->dst) != drm_rect_height(&cur->dst)) + drm_rect_width(&new->base.src) != drm_rect_width(&cur->base.src) || + drm_rect_height(&new->base.src) != drm_rect_height(&cur->base.src) || + drm_rect_width(&new->base.dst) != drm_rect_width(&cur->base.dst) || + drm_rect_height(&new->base.dst) != drm_rect_height(&cur->base.dst)) return true; return false; @@@ -11837,10 -11831,10 +11831,10 @@@ static bool needs_scaling(struct intel_plane_state *state) { - int src_w = drm_rect_width(&state->src) >> 16; - int src_h = drm_rect_height(&state->src) >> 16; - int dst_w = drm_rect_width(&state->dst); - int dst_h = drm_rect_height(&state->dst); + int src_w = drm_rect_width(&state->base.src) >> 16; + int src_h = drm_rect_height(&state->base.src) >> 16; + int dst_w = drm_rect_width(&state->base.dst); + int dst_h = drm_rect_height(&state->base.dst); return (src_w != dst_w || src_h != dst_h); } @@@ -11871,8 -11865,8 +11865,8 @@@ int intel_plane_atomic_calc_changes(str return ret; } - was_visible = old_plane_state->visible; - visible = to_intel_plane_state(plane_state)->visible; + was_visible = old_plane_state->base.visible; + visible = to_intel_plane_state(plane_state)->base.visible; if (!was_crtc_enabled && WARN_ON(was_visible)) was_visible = false; @@@ -11888,7 -11882,7 +11882,7 @@@ * only combine the results from all planes in the current place? */ if (!is_crtc_enabled) - to_intel_plane_state(plane_state)->visible = visible = false; + to_intel_plane_state(plane_state)->base.visible = visible = false; if (!was_visible && !visible) return 0; @@@ -12106,11 -12100,21 +12100,11 @@@ connected_sink_compute_bpp(struct intel pipe_config->pipe_bpp = connector->base.display_info.bpc*3; } - /* Clamp bpp to default limit on screens without EDID 1.4 */ - if (connector->base.display_info.bpc == 0) { - int type = connector->base.connector_type; - int clamp_bpp = 24; - - /* Fall back to 18 bpp when DP sink capability is unknown. */ - if (type == DRM_MODE_CONNECTOR_DisplayPort || - type == DRM_MODE_CONNECTOR_eDP) - clamp_bpp = 18; - - if (bpp > clamp_bpp) { - DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n", - bpp, clamp_bpp); - pipe_config->pipe_bpp = clamp_bpp; - } + /* Clamp bpp to 8 on screens without EDID 1.4 */ + if (connector->base.display_info.bpc == 0 && bpp > 24) { + DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n", + bpp); + pipe_config->pipe_bpp = 24; } } @@@ -12283,13 -12287,12 +12277,13 @@@ static void intel_dump_pipe_config(stru drm_get_format_name(fb->pixel_format)); DRM_DEBUG_KMS("\tscaler:%d src %dx%d+%d+%d dst %dx%d+%d+%d\n", state->scaler_id, - state->src.x1 >> 16, state->src.y1 >> 16, - drm_rect_width(&state->src) >> 16, - drm_rect_height(&state->src) >> 16, - state->dst.x1, state->dst.y1, - drm_rect_width(&state->dst), - drm_rect_height(&state->dst)); + state->base.src.x1 >> 16, + state->base.src.y1 >> 16, + drm_rect_width(&state->base.src) >> 16, + drm_rect_height(&state->base.src) >> 16, + state->base.dst.x1, state->base.dst.y1, + drm_rect_width(&state->base.dst), + drm_rect_height(&state->base.dst)); } } @@@ -12298,6 -12301,7 +12292,7 @@@ static bool check_digital_port_conflict struct drm_device *dev = state->dev; struct drm_connector *connector; unsigned int used_ports = 0; + unsigned int used_mst_ports = 0; /* * Walk the connector list instead of the encoder @@@ -12334,11 -12338,20 +12329,20 @@@ return false; used_ports |= port_mask; + break; + case INTEL_OUTPUT_DP_MST: + used_mst_ports |= + 1 << enc_to_mst(&encoder->base)->primary->port; + break; default: break; } } + /* can't mix MST and SST/HDMI on the same port */ + if (used_ports & used_mst_ports) + return false; + return true; } @@@ -13506,8 -13519,8 +13510,8 @@@ static int intel_atomic_prepare_commit( if (!intel_plane_state->wait_req) continue; - ret = __i915_wait_request(intel_plane_state->wait_req, - true, NULL, NULL); + ret = i915_wait_request(intel_plane_state->wait_req, + true, NULL, NULL); if (ret) { /* Any hang should be swallowed by the wait */ WARN_ON(ret == -EIO); @@@ -13619,8 -13632,8 +13623,8 @@@ static void intel_atomic_commit_tail(st if (!intel_plane_state->wait_req) continue; - ret = __i915_wait_request(intel_plane_state->wait_req, - true, NULL, NULL); + ret = i915_wait_request(intel_plane_state->wait_req, + true, NULL, NULL); /* EIO should be eaten, and we can't get interrupted in the * worker, and blocking commits have waited already. */ WARN_ON(ret); @@@ -13797,19 -13810,12 +13801,12 @@@ static void intel_atomic_track_fbs(stru { struct drm_plane_state *old_plane_state; struct drm_plane *plane; - struct drm_i915_gem_object *obj, *old_obj; - struct intel_plane *intel_plane; int i; - mutex_lock(&state->dev->struct_mutex); - for_each_plane_in_state(state, plane, old_plane_state, i) { - obj = intel_fb_obj(plane->state->fb); - old_obj = intel_fb_obj(old_plane_state->fb); - intel_plane = to_intel_plane(plane); - - i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit); - } - mutex_unlock(&state->dev->struct_mutex); + for_each_plane_in_state(state, plane, old_plane_state, i) + i915_gem_track_fb(intel_fb_obj(old_plane_state->fb), + intel_fb_obj(plane->state->fb), + to_intel_plane(plane)->frontbuffer_bit); } /** @@@ -14038,11 -14044,9 +14035,9 @@@ intel_prepare_plane_fb(struct drm_plan } if (ret == 0) { - struct intel_plane_state *plane_state = - to_intel_plane_state(new_state); - - i915_gem_request_assign(&plane_state->wait_req, - obj->last_write_req); + to_intel_plane_state(new_state)->wait_req = + i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); } return ret; @@@ -14063,6 -14067,7 +14058,7 @@@ intel_cleanup_plane_fb(struct drm_plan { struct drm_device *dev = plane->dev; struct intel_plane_state *old_intel_state; + struct intel_plane_state *intel_state = to_intel_plane_state(plane->state); struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb); struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb); @@@ -14075,6 -14080,7 +14071,7 @@@ !INTEL_INFO(dev)->cursor_needs_physical)) intel_unpin_fb_obj(old_state->fb, old_state->rotation); + i915_gem_request_assign(&intel_state->wait_req, NULL); i915_gem_request_assign(&old_intel_state->wait_req, NULL); } @@@ -14110,6 -14116,7 +14107,6 @@@ intel_check_primary_plane(struct drm_pl struct intel_plane_state *state) { struct drm_crtc *crtc = state->base.crtc; - struct drm_framebuffer *fb = state->base.fb; int min_scale = DRM_PLANE_HELPER_NO_SCALING; int max_scale = DRM_PLANE_HELPER_NO_SCALING; bool can_position = false; @@@ -14123,10 -14130,12 +14120,10 @@@ can_position = true; } - return drm_plane_helper_check_update(plane, crtc, fb, &state->src, - &state->dst, &state->clip, - state->base.rotation, - min_scale, max_scale, - can_position, true, - &state->visible); + return drm_plane_helper_check_state(&state->base, + &state->clip, + min_scale, max_scale, + can_position, true); } static void intel_begin_crtc_commit(struct drm_crtc *crtc, @@@ -14286,11 -14295,11 +14283,11 @@@ fail void intel_create_rotation_property(struct drm_device *dev, struct intel_plane *plane) { if (!dev->mode_config.rotation_property) { - unsigned long flags = BIT(DRM_ROTATE_0) | - BIT(DRM_ROTATE_180); + unsigned long flags = DRM_ROTATE_0 | + DRM_ROTATE_180; if (INTEL_INFO(dev)->gen >= 9) - flags |= BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270); + flags |= DRM_ROTATE_90 | DRM_ROTATE_270; dev->mode_config.rotation_property = drm_mode_create_rotation_property(dev, flags); @@@ -14306,17 -14315,19 +14303,17 @@@ intel_check_cursor_plane(struct drm_pla struct intel_crtc_state *crtc_state, struct intel_plane_state *state) { - struct drm_crtc *crtc = crtc_state->base.crtc; struct drm_framebuffer *fb = state->base.fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); enum pipe pipe = to_intel_plane(plane)->pipe; unsigned stride; int ret; - ret = drm_plane_helper_check_update(plane, crtc, fb, &state->src, - &state->dst, &state->clip, - state->base.rotation, - DRM_PLANE_HELPER_NO_SCALING, - DRM_PLANE_HELPER_NO_SCALING, - true, true, &state->visible); + ret = drm_plane_helper_check_state(&state->base, + &state->clip, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true); if (ret) return ret; @@@ -14353,7 -14364,7 +14350,7 @@@ * Refuse the put the cursor into that compromised position. */ if (IS_CHERRYVIEW(plane->dev) && pipe == PIPE_C && - state->visible && state->base.crtc_x < 0) { + state->base.visible && state->base.crtc_x < 0) { DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); return -EINVAL; } @@@ -14431,8 -14442,8 +14428,8 @@@ static struct drm_plane *intel_cursor_p if (!dev->mode_config.rotation_property) dev->mode_config.rotation_property = drm_mode_create_rotation_property(dev, - BIT(DRM_ROTATE_0) | - BIT(DRM_ROTATE_180)); + DRM_ROTATE_0 | + DRM_ROTATE_180); if (dev->mode_config.rotation_property) drm_object_attach_property(&cursor->base.base, dev->mode_config.rotation_property, @@@ -14831,7 -14842,7 +14828,7 @@@ static void intel_user_framebuffer_dest drm_framebuffer_cleanup(fb); mutex_lock(&dev->struct_mutex); WARN_ON(!intel_fb->obj->framebuffer_references--); - drm_gem_object_unreference(&intel_fb->obj->base); + i915_gem_object_put(intel_fb->obj); mutex_unlock(&dev->struct_mutex); kfree(intel_fb); } @@@ -14920,15 -14931,15 +14917,15 @@@ static int intel_framebuffer_init(struc if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { /* Enforce that fb modifier and tiling mode match, but only for * X-tiled. This is needed for FBC. */ - if (!!(obj->tiling_mode == I915_TILING_X) != + if (!!(i915_gem_object_get_tiling(obj) == I915_TILING_X) != !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) { DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); return -EINVAL; } } else { - if (obj->tiling_mode == I915_TILING_X) + if (i915_gem_object_get_tiling(obj) == I915_TILING_X) mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; - else if (obj->tiling_mode == I915_TILING_Y) { + else if (i915_gem_object_get_tiling(obj) == I915_TILING_Y) { DRM_DEBUG("No Y tiling for legacy addfb\n"); return -EINVAL; } @@@ -14972,9 -14983,10 +14969,10 @@@ } if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED && - mode_cmd->pitches[0] != obj->stride) { + mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) { DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", - mode_cmd->pitches[0], obj->stride); + mode_cmd->pitches[0], + i915_gem_object_get_stride(obj)); return -EINVAL; } @@@ -15068,13 -15080,13 +15066,13 @@@ intel_user_framebuffer_create(struct dr struct drm_i915_gem_object *obj; struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd; - obj = to_intel_bo(drm_gem_object_lookup(filp, mode_cmd.handles[0])); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]); + if (!obj) return ERR_PTR(-ENOENT); fb = intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return fb; } @@@ -15482,7 -15494,6 +15480,6 @@@ void intel_modeset_init_hw(struct drm_d dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; intel_init_clock_gating(dev); - intel_enable_gt_powersave(dev_priv); } /* @@@ -15803,7 -15814,7 +15800,7 @@@ static void intel_sanitize_crtc(struct * Temporarily change the plane mapping and disable everything * ... */ plane = crtc->plane; - to_intel_plane_state(crtc->base.primary->state)->visible = true; + to_intel_plane_state(crtc->base.primary->state)->base.visible = true; crtc->plane = !plane; intel_crtc_disable_noatomic(&crtc->base); crtc->plane = plane; @@@ -15930,10 -15941,10 +15927,10 @@@ static void readout_plane_state(struct struct intel_plane_state *plane_state = to_intel_plane_state(primary->state); - plane_state->visible = crtc->active && + plane_state->base.visible = crtc->active && primary_get_hw_state(to_intel_plane(primary)); - if (plane_state->visible) + if (plane_state->base.visible) crtc->base.state->plane_mask |= 1 << drm_plane_index(primary); } diff --combined drivers/gpu/drm/i915/intel_drv.h index 9c59521afb18,b1fc67ed8bc2..c29a429cbc45 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@@ -338,7 -338,10 +338,7 @@@ struct intel_atomic_state struct intel_plane_state { struct drm_plane_state base; - struct drm_rect src; - struct drm_rect dst; struct drm_rect clip; - bool visible; /* * scaler_id @@@ -849,6 -852,7 +849,7 @@@ struct intel_dp int link_rate; uint8_t lane_count; uint8_t sink_count; + bool link_mst; bool has_audio; bool detect_done; enum hdmi_force_audio force_audio; @@@ -1104,7 -1108,7 +1105,7 @@@ void intel_crt_reset(struct drm_encode /* intel_ddi.c */ void intel_ddi_clk_select(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config); - void intel_prepare_ddi_buffer(struct intel_encoder *encoder); + void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); void hsw_fdi_link_train(struct drm_crtc *crtc); void intel_ddi_init(struct drm_device *dev, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); @@@ -1131,21 -1135,10 +1132,10 @@@ void intel_ddi_clock_get(struct intel_e void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); - /* intel_frontbuffer.c */ - void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin); - void intel_frontbuffer_flip_prepare(struct drm_device *dev, - unsigned frontbuffer_bits); - void intel_frontbuffer_flip_complete(struct drm_device *dev, - unsigned frontbuffer_bits); - void intel_frontbuffer_flip(struct drm_device *dev, - unsigned frontbuffer_bits); unsigned int intel_fb_align_height(struct drm_device *dev, unsigned int height, uint32_t pixel_format, uint64_t fb_format_modifier); - void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire, - enum fb_op_origin origin); u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, uint64_t fb_modifier, uint32_t pixel_format); @@@ -1255,7 -1248,7 +1245,7 @@@ unsigned int intel_tile_height(const st static inline bool intel_rotation_90_or_270(unsigned int rotation) { - return rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270)); + return rotation & (DRM_ROTATE_90 | DRM_ROTATE_270); } void intel_create_rotation_property(struct drm_device *dev, @@@ -1381,11 -1374,12 +1371,12 @@@ uint32_t intel_dp_pack_aux(const uint8_ void intel_plane_destroy(struct drm_plane *plane); void intel_edp_drrs_enable(struct intel_dp *intel_dp); void intel_edp_drrs_disable(struct intel_dp *intel_dp); - void intel_edp_drrs_invalidate(struct drm_device *dev, - unsigned frontbuffer_bits); - void intel_edp_drrs_flush(struct drm_device *dev, unsigned frontbuffer_bits); + void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits); + void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits); bool intel_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port); + struct intel_digital_port *port); void intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, @@@ -1558,13 -1552,13 +1549,13 @@@ static inline void intel_backlight_devi /* intel_psr.c */ void intel_psr_enable(struct intel_dp *intel_dp); void intel_psr_disable(struct intel_dp *intel_dp); - void intel_psr_invalidate(struct drm_device *dev, + void intel_psr_invalidate(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); - void intel_psr_flush(struct drm_device *dev, + void intel_psr_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin); void intel_psr_init(struct drm_device *dev); - void intel_psr_single_frame_update(struct drm_device *dev, + void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); /* intel_runtime_pm.c */ @@@ -1664,13 -1658,6 +1655,6 @@@ enable_rpm_wakeref_asserts(struct drm_i atomic_dec(&dev_priv->pm.wakeref_count); } - /* TODO: convert users of these to rely instead on proper RPM refcounting */ - #define DISABLE_RPM_WAKEREF_ASSERTS(dev_priv) \ - disable_rpm_wakeref_asserts(dev_priv) - - #define ENABLE_RPM_WAKEREF_ASSERTS(dev_priv) \ - enable_rpm_wakeref_asserts(dev_priv) - void intel_runtime_pm_get(struct drm_i915_private *dev_priv); bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv); void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv); @@@ -1696,11 -1683,11 +1680,11 @@@ void intel_gpu_ips_init(struct drm_i915 void intel_gpu_ips_teardown(void); void intel_init_gt_powersave(struct drm_i915_private *dev_priv); void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); + void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); + void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); - void intel_reset_gt_powersave(struct drm_i915_private *dev_priv); - void gen6_update_ring_freq(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); @@@ -1727,8 -1714,6 +1711,8 @@@ bool intel_sdvo_init(struct drm_device /* intel_sprite.c */ +int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, + int usecs); int intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane); int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); diff --combined drivers/gpu/drm/i915/intel_fbc.c index 3f4e32f8baae,85adc2b92594..e67b09a3328c --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@@ -494,7 -494,7 +494,7 @@@ static bool multiple_pipes_ok(struct in if (!no_fbc_on_multiple_pipes(dev_priv)) return true; - if (plane_state->visible) + if (plane_state->base.visible) fbc->visible_pipes_mask |= (1 << pipe); else fbc->visible_pipes_mask &= ~(1 << pipe); @@@ -725,9 -725,9 +725,9 @@@ static void intel_fbc_update_state_cach ilk_pipe_pixel_rate(crtc_state); cache->plane.rotation = plane_state->base.rotation; - cache->plane.src_w = drm_rect_width(&plane_state->src) >> 16; - cache->plane.src_h = drm_rect_height(&plane_state->src) >> 16; - cache->plane.visible = plane_state->visible; + cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; + cache->plane.src_h = drm_rect_height(&plane_state->base.src) >> 16; + cache->plane.visible = plane_state->base.visible; if (!cache->plane.visible) return; @@@ -741,7 -741,7 +741,7 @@@ cache->fb.pixel_format = fb->pixel_format; cache->fb.stride = fb->pitches[0]; cache->fb.fence_reg = obj->fence_reg; - cache->fb.tiling_mode = obj->tiling_mode; + cache->fb.tiling_mode = i915_gem_object_get_tiling(obj); } static bool intel_fbc_can_activate(struct intel_crtc *crtc) @@@ -775,7 -775,7 +775,7 @@@ return false; } if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_G4X(dev_priv) && - cache->plane.rotation != BIT(DRM_ROTATE_0)) { + cache->plane.rotation != DRM_ROTATE_0) { fbc->no_fbc_reason = "rotation unsupported"; return false; } @@@ -1050,7 -1050,7 +1050,7 @@@ void intel_fbc_choose_crtc(struct drm_i struct intel_plane_state *intel_plane_state = to_intel_plane_state(plane_state); - if (!intel_plane_state->visible) + if (!intel_plane_state->base.visible) continue; for_each_crtc_in_state(state, crtc, crtc_state, j) { @@@ -1075,6 -1075,8 +1075,8 @@@ out /** * intel_fbc_enable: tries to enable FBC on the CRTC * @crtc: the CRTC + * @crtc_state: corresponding &drm_crtc_state for @crtc + * @plane_state: corresponding &drm_plane_state for the primary plane of @crtc * * This function checks if the given CRTC was chosen for FBC, then enables it if * possible. Notice that it doesn't activate FBC. It is valid to call @@@ -1163,11 -1165,8 +1165,8 @@@ void intel_fbc_disable(struct intel_crt return; mutex_lock(&fbc->lock); - if (fbc->crtc == crtc) { - WARN_ON(!fbc->enabled); - WARN_ON(fbc->active); + if (fbc->crtc == crtc) __intel_fbc_disable(dev_priv); - } mutex_unlock(&fbc->lock); cancel_work_sync(&fbc->work.work); @@@ -1212,7 -1211,7 +1211,7 @@@ void intel_fbc_init_pipe_state(struct d for_each_intel_crtc(&dev_priv->drm, crtc) if (intel_crtc_active(&crtc->base) && - to_intel_plane_state(crtc->base.primary->state)->visible) + to_intel_plane_state(crtc->base.primary->state)->base.visible) dev_priv->fbc.visible_pipes_mask |= (1 << crtc->pipe); } @@@ -1230,12 -1229,29 +1229,29 @@@ static int intel_sanitize_fbc_option(st if (i915.enable_fbc >= 0) return !!i915.enable_fbc; + if (!HAS_FBC(dev_priv)) + return 0; + if (IS_BROADWELL(dev_priv)) return 1; return 0; } + static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv) + { + #ifdef CONFIG_INTEL_IOMMU + /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */ + if (intel_iommu_gfx_mapped && + (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) { + DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n"); + return true; + } + #endif + + return false; + } + /** * intel_fbc_init - Initialize FBC * @dev_priv: the i915 device @@@ -1253,6 -1269,9 +1269,9 @@@ void intel_fbc_init(struct drm_i915_pri fbc->active = false; fbc->work.scheduled = false; + if (need_fbc_vtd_wa(dev_priv)) + mkwrite_device_info(dev_priv)->has_fbc = false; + i915.enable_fbc = intel_sanitize_fbc_option(dev_priv); DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc); diff --combined drivers/gpu/drm/i915/intel_fbdev.c index 69d7ea576baa,0436b4869d57..2c14dfc5e4f0 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@@ -34,6 -34,7 +34,6 @@@ #include #include #include -#include #include #include @@@ -41,6 -42,7 +41,7 @@@ #include #include #include "intel_drv.h" + #include "intel_frontbuffer.h" #include #include "i915_drv.h" @@@ -158,7 -160,7 +159,7 @@@ static int intelfb_alloc(struct drm_fb_ fb = __intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); ret = PTR_ERR(fb); goto out; } @@@ -188,7 -190,7 +189,7 @@@ static int intelfb_create(struct drm_fb struct i915_vma *vma; struct drm_i915_gem_object *obj; bool prealloc = false; - void *vaddr; + void __iomem *vaddr; int ret; if (intel_fb && @@@ -222,7 -224,7 +223,7 @@@ * This also validates that any existing fb inherited from the * BIOS is suitable for own access. */ - ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0)); + ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); if (ret) goto out_unlock; @@@ -288,7 -290,7 +289,7 @@@ out_destroy_fbi: drm_fb_helper_release_fbi(helper); out_unpin: - intel_unpin_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0)); + intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); out_unlock: mutex_unlock(&dev->struct_mutex); return ret; @@@ -553,7 -555,7 +554,7 @@@ static void intel_fbdev_destroy(struct if (ifbdev->fb) { mutex_lock(&ifbdev->helper.dev->struct_mutex); - intel_unpin_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0)); + intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); mutex_unlock(&ifbdev->helper.dev->struct_mutex); drm_framebuffer_remove(&ifbdev->fb->base); @@@ -767,7 -769,7 +768,7 @@@ void intel_fbdev_fini(struct drm_devic if (!ifbdev) return; - flush_work(&dev_priv->fbdev_suspend_work); + cancel_work_sync(&dev_priv->fbdev_suspend_work); if (!current_is_async()) intel_fbdev_sync(ifbdev); diff --combined drivers/gpu/drm/i915/intel_pm.c index ba062d70a548,aef0b105eb58..81ab11934d85 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@@ -340,6 -340,11 +340,11 @@@ void intel_set_memory_cxsr(struct drm_i I915_WRITE(FW_BLC_SELF, val); POSTING_READ(FW_BLC_SELF); } else if (IS_I915GM(dev)) { + /* + * FIXME can't find a bit like this for 915G, and + * and yet it does have the related watermark in + * FW_BLC_SELF. What's going on? + */ val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : _MASKED_BIT_DISABLE(INSTPM_SELF_EN); I915_WRITE(INSTPM, val); @@@ -960,7 -965,7 +965,7 @@@ static uint16_t vlv_compute_wm_level(st if (dev_priv->wm.pri_latency[level] == 0) return USHRT_MAX; - if (!state->visible) + if (!state->base.visible) return 0; cpp = drm_format_plane_cpp(state->base.fb->pixel_format, 0); @@@ -1002,7 -1007,7 +1007,7 @@@ static void vlv_compute_fifo(struct int if (plane->base.type == DRM_PLANE_TYPE_CURSOR) continue; - if (state->visible) { + if (state->base.visible) { wm_state->num_active_planes++; total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0); } @@@ -1018,7 -1023,7 +1023,7 @@@ continue; } - if (!state->visible) { + if (!state->base.visible) { plane->wm.fifo_size = 0; continue; } @@@ -1118,7 -1123,7 +1123,7 @@@ static void vlv_compute_wm(struct intel struct intel_plane_state *state = to_intel_plane_state(plane->base.state); - if (!state->visible) + if (!state->base.visible) continue; /* normal watermarks */ @@@ -1580,7 -1585,7 +1585,7 @@@ static void i9xx_update_wm(struct drm_c obj = intel_fb_obj(enabled->primary->state->fb); /* self-refresh seems busted with untiled */ - if (obj->tiling_mode == I915_TILING_NONE) + if (!i915_gem_object_is_tiled(obj)) enabled = NULL; } @@@ -1604,6 -1609,9 +1609,9 @@@ unsigned long line_time_us; int entries; + if (IS_I915GM(dev) || IS_I945GM(dev)) + cpp = 4; + line_time_us = max(htotal * 1000 / clock, 1); /* Use ns/us then divide to preserve precision */ @@@ -1618,7 -1626,7 +1626,7 @@@ if (IS_I945G(dev) || IS_I945GM(dev)) I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); - else if (IS_I915GM(dev)) + else I915_WRITE(FW_BLC_SELF, srwm & 0x3f); } @@@ -1767,7 -1775,7 +1775,7 @@@ static uint32_t ilk_compute_pri_wm(cons drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; uint32_t method1, method2; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); @@@ -1777,7 -1785,7 +1785,7 @@@ method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), cstate->base.adjusted_mode.crtc_htotal, - drm_rect_width(&pstate->dst), + drm_rect_width(&pstate->base.dst), cpp, mem_value); return min(method1, method2); @@@ -1795,13 -1803,13 +1803,13 @@@ static uint32_t ilk_compute_spr_wm(cons drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; uint32_t method1, method2; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), cstate->base.adjusted_mode.crtc_htotal, - drm_rect_width(&pstate->dst), + drm_rect_width(&pstate->base.dst), cpp, mem_value); return min(method1, method2); } @@@ -1820,7 -1828,7 +1828,7 @@@ static uint32_t ilk_compute_cur_wm(cons * this is necessary to avoid flickering. */ int cpp = 4; - int width = pstate->visible ? pstate->base.crtc_w : 64; + int width = pstate->base.visible ? pstate->base.crtc_w : 64; if (!cstate->base.active) return 0; @@@ -1838,10 -1846,10 +1846,10 @@@ static uint32_t ilk_compute_fbc_wm(cons int cpp = pstate->base.fb ? drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; - return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), cpp); + return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp); } static unsigned int ilk_display_fifo_size(const struct drm_device *dev) @@@ -2358,10 -2366,10 +2366,10 @@@ static int ilk_compute_pipe_wm(struct i pipe_wm->pipe_enabled = cstate->base.active; if (sprstate) { - pipe_wm->sprites_enabled = sprstate->visible; - pipe_wm->sprites_scaled = sprstate->visible && - (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 || - drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16); + pipe_wm->sprites_enabled = sprstate->base.visible; + pipe_wm->sprites_scaled = sprstate->base.visible && + (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 || + drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16); } usable_level = max_level; @@@ -2996,14 -3004,14 +3004,14 @@@ skl_plane_downscale_amount(const struc uint32_t downscale_h, downscale_w; uint32_t src_w, src_h, dst_w, dst_h; - if (WARN_ON(!pstate->visible)) + if (WARN_ON(!pstate->base.visible)) return DRM_PLANE_HELPER_NO_SCALING; /* n.b., src is 16.16 fixed point, dst is whole integer */ - src_w = drm_rect_width(&pstate->src); - src_h = drm_rect_height(&pstate->src); - dst_w = drm_rect_width(&pstate->dst); - dst_h = drm_rect_height(&pstate->dst); + src_w = drm_rect_width(&pstate->base.src); + src_h = drm_rect_height(&pstate->base.src); + dst_w = drm_rect_width(&pstate->base.dst); + dst_h = drm_rect_height(&pstate->base.dst); if (intel_rotation_90_or_270(pstate->base.rotation)) swap(dst_w, dst_h); @@@ -3025,15 -3033,15 +3033,15 @@@ skl_plane_relative_data_rate(const stru uint32_t width = 0, height = 0; unsigned format = fb ? fb->pixel_format : DRM_FORMAT_XRGB8888; - if (!intel_pstate->visible) + if (!intel_pstate->base.visible) return 0; if (pstate->plane->type == DRM_PLANE_TYPE_CURSOR) return 0; if (y && format != DRM_FORMAT_NV12) return 0; - width = drm_rect_width(&intel_pstate->src) >> 16; - height = drm_rect_height(&intel_pstate->src) >> 16; + width = drm_rect_width(&intel_pstate->base.src) >> 16; + height = drm_rect_height(&intel_pstate->base.src) >> 16; if (intel_rotation_90_or_270(pstate->rotation)) swap(width, height); @@@ -3134,8 -3142,8 +3142,8 @@@ skl_ddb_min_alloc(const struct drm_plan fb->modifier[0] != I915_FORMAT_MOD_Yf_TILED) return 8; - src_w = drm_rect_width(&intel_pstate->src) >> 16; - src_h = drm_rect_height(&intel_pstate->src) >> 16; + src_w = drm_rect_width(&intel_pstate->base.src) >> 16; + src_h = drm_rect_height(&intel_pstate->base.src) >> 16; if (intel_rotation_90_or_270(pstate->rotation)) swap(src_w, src_h); @@@ -3226,7 -3234,7 +3234,7 @@@ skl_allocate_pipe_ddb(struct intel_crtc if (intel_plane->pipe != pipe) continue; - if (!to_intel_plane_state(pstate)->visible) { + if (!to_intel_plane_state(pstate)->base.visible) { minimum[id] = 0; y_minimum[id] = 0; continue; @@@ -3344,6 -3352,8 +3352,8 @@@ static uint32_t skl_wm_method2(uint32_ plane_bytes_per_line *= 4; plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); plane_blocks_per_line /= 4; + } else if (tiling == DRM_FORMAT_MOD_NONE) { + plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; } else { plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); } @@@ -3363,7 -3373,7 +3373,7 @@@ static uint32_t skl_adjusted_plane_pixe uint64_t pixel_rate; /* Shouldn't reach here on disabled planes... */ - if (WARN_ON(!pstate->visible)) + if (WARN_ON(!pstate->base.visible)) return 0; /* @@@ -3399,13 -3409,13 +3409,13 @@@ static int skl_compute_plane_wm(const s uint32_t width = 0, height = 0; uint32_t plane_pixel_rate; - if (latency == 0 || !cstate->base.active || !intel_pstate->visible) { + if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { *enabled = false; return 0; } - width = drm_rect_width(&intel_pstate->src) >> 16; - height = drm_rect_height(&intel_pstate->src) >> 16; + width = drm_rect_width(&intel_pstate->base.src) >> 16; + height = drm_rect_height(&intel_pstate->base.src) >> 16; if (intel_rotation_90_or_270(pstate->rotation)) swap(width, height); @@@ -4912,7 -4922,7 +4922,7 @@@ void gen6_rps_boost(struct drm_i915_pri */ if (!(dev_priv->gt.awake && dev_priv->rps.enabled && - dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) + dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) return; /* Force a RPS boost (and don't count it against the client) if @@@ -5103,35 -5113,31 +5113,31 @@@ int sanitize_rc6_option(struct drm_i915 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { - uint32_t rp_state_cap; - u32 ddcc_status = 0; - int ret; - /* All of these values are in units of 50MHz */ - dev_priv->rps.cur_freq = 0; + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_BROXTON(dev_priv)) { - rp_state_cap = I915_READ(BXT_RP_STATE_CAP); + u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; } else { - rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; } - /* hw_max = RP0 until we check for overclocking */ - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - ret = sandybridge_pcode_read(dev_priv, - HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, - &ddcc_status); - if (0 == ret) + u32 ddcc_status = 0; + + if (sandybridge_pcode_read(dev_priv, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status) == 0) dev_priv->rps.efficient_freq = clamp_t(u8, ((ddcc_status >> 8) & 0xff), @@@ -5141,29 -5147,26 +5147,26 @@@ if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is - the natural hardware unit for SKL */ + * the natural hardware unit for SKL + */ dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; } + } - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; + static void reset_rps(struct drm_i915_private *dev_priv, + void (*set)(struct drm_i915_private *, u8)) + { + u8 freq = dev_priv->rps.cur_freq; - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + /* force a reset */ + dev_priv->rps.power = -1; + dev_priv->rps.cur_freq = -1; - if (dev_priv->rps.min_freq_softlimit == 0) { - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - dev_priv->rps.min_freq_softlimit = - max_t(int, dev_priv->rps.efficient_freq, - intel_freq_opcode(dev_priv, 450)); - else - dev_priv->rps.min_freq_softlimit = - dev_priv->rps.min_freq; - } + set(dev_priv, freq); } /* See the Gen9_GT_PM_Programming_Guide doc for the below */ @@@ -5171,8 -5174,6 +5174,6 @@@ static void gen9_enable_rps(struct drm_ { intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - gen6_init_rps_frequencies(dev_priv); - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { /* @@@ -5202,8 -5203,7 +5203,7 @@@ /* Leaning on the below call to gen6_set_rps to program/setup the * Up/Down EI & threshold registers, as well as the RP_CONTROL, * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@@ -5290,9 -5290,6 +5290,6 @@@ static void gen8_enable_rps(struct drm_ /* 2a: Disable RC states. */ I915_WRITE(GEN6_RC_CONTROL, 0); - /* Initialize rps frequencies */ - gen6_init_rps_frequencies(dev_priv); - /* 2b: Program RC6 thresholds.*/ I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ @@@ -5349,8 -5346,7 +5346,7 @@@ /* 6: Ring frequency + overclocking (our driver does this later */ - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@@ -5358,7 -5354,7 +5354,7 @@@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; + u32 rc6vids, rc6_mask = 0; u32 gtfifodbg; int rc6_mode; int ret; @@@ -5382,9 -5378,6 +5378,6 @@@ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* Initialize rps frequencies */ - gen6_init_rps_frequencies(dev_priv); - /* disable the counters and set deterministic thresholds */ I915_WRITE(GEN6_RC_CONTROL, 0); @@@ -5435,16 -5428,7 +5428,7 @@@ if (ret) DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); - ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); - if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", - (dev_priv->rps.max_freq_softlimit & 0xff) * 50, - (pcu_mbox & 0xff) * 50); - dev_priv->rps.max_freq = pcu_mbox & 0xff; - } - - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); rc6vids = 0; ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); @@@ -5463,7 -5447,7 +5447,7 @@@ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } - static void __gen6_update_ring_freq(struct drm_i915_private *dev_priv) + static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { int min_freq = 15; unsigned int gpu_freq; @@@ -5547,16 -5531,6 +5531,6 @@@ } } - void gen6_update_ring_freq(struct drm_i915_private *dev_priv) - { - if (!HAS_CORE_RING_FREQ(dev_priv)) - return; - - mutex_lock(&dev_priv->rps.hw_lock); - __gen6_update_ring_freq(dev_priv); - mutex_unlock(&dev_priv->rps.hw_lock); - } - static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) { u32 val, rp0; @@@ -5746,7 -5720,7 +5720,7 @@@ static void valleyview_cleanup_pctx(str if (WARN_ON(!dev_priv->vlv_pctx)) return; - drm_gem_object_unreference_unlocked(&dev_priv->vlv_pctx->base); + i915_gem_object_put_unlocked(dev_priv->vlv_pctx); dev_priv->vlv_pctx = NULL; } @@@ -5769,8 -5743,6 +5743,6 @@@ static void valleyview_init_gt_powersav vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); switch ((val >> 6) & 3) { case 0: @@@ -5806,17 -5778,6 +5778,6 @@@ DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); - - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - - mutex_unlock(&dev_priv->rps.hw_lock); } static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) @@@ -5827,8 -5788,6 +5788,6 @@@ vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - mutex_lock(&dev_priv->sb_lock); val = vlv_cck_read(dev_priv, CCK_FUSE_REG); mutex_unlock(&dev_priv->sb_lock); @@@ -5870,17 -5829,6 +5829,6 @@@ dev_priv->rps.rp1_freq | dev_priv->rps.min_freq) & 1, "Odd GPU freq values\n"); - - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - - mutex_unlock(&dev_priv->rps.hw_lock); } static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) @@@ -5971,16 -5919,7 +5919,7 @@@ static void cherryview_enable_rps(struc DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - dev_priv->rps.idle_freq); - - valleyview_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, valleyview_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@@ -6060,16 -5999,7 +5999,7 @@@ static void valleyview_enable_rps(struc DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - dev_priv->rps.idle_freq); - - valleyview_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, valleyview_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@@ -6398,19 -6328,11 +6328,11 @@@ EXPORT_SYMBOL_GPL(i915_gpu_lower) */ bool i915_gpu_busy(void) { - struct drm_i915_private *dev_priv; - struct intel_engine_cs *engine; bool ret = false; spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) - goto out_unlock; - dev_priv = i915_mch_dev; - - for_each_engine(engine, dev_priv) - ret |= !list_empty(&engine->request_list); - - out_unlock: + if (i915_mch_dev) + ret = i915_mch_dev->gt.awake; spin_unlock_irq(&mchdev_lock); return ret; @@@ -6566,30 -6488,60 +6488,60 @@@ void intel_init_gt_powersave(struct drm intel_runtime_pm_get(dev_priv); } + mutex_lock(&dev_priv->rps.hw_lock); + + /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) cherryview_init_gt_powersave(dev_priv); else if (IS_VALLEYVIEW(dev_priv)) valleyview_init_gt_powersave(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_init_rps_frequencies(dev_priv); + + /* Derive initial user preferences/limits from the hardware limits */ + dev_priv->rps.idle_freq = dev_priv->rps.min_freq; + dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; + + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + dev_priv->rps.min_freq_softlimit = + max_t(int, + dev_priv->rps.efficient_freq, + intel_freq_opcode(dev_priv, 450)); + + /* After setting max-softlimit, find the overclock max freq */ + if (IS_GEN6(dev_priv) || + IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { + u32 params = 0; + + sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); + if (params & BIT(31)) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (dev_priv->rps.max_freq & 0xff) * 50, + (params & 0xff) * 50); + dev_priv->rps.max_freq = params & 0xff; + } + } + + /* Finally allow us to boost to max by default */ + dev_priv->rps.boost_freq = dev_priv->rps.max_freq; + + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_autoenable_gt_powersave(dev_priv); } void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) { - if (IS_CHERRYVIEW(dev_priv)) - return; - else if (IS_VALLEYVIEW(dev_priv)) + if (IS_VALLEYVIEW(dev_priv)) valleyview_cleanup_gt_powersave(dev_priv); if (!i915.enable_rc6) intel_runtime_pm_put(dev_priv); } - static void gen6_suspend_rps(struct drm_i915_private *dev_priv) - { - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - gen6_disable_rps_interrupts(dev_priv); - } - /** * intel_suspend_gt_powersave - suspend PM work and helper threads * @dev_priv: i915 device @@@ -6603,60 -6555,76 +6555,76 @@@ void intel_suspend_gt_powersave(struct if (INTEL_GEN(dev_priv) < 6) return; - gen6_suspend_rps(dev_priv); + if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) + intel_runtime_pm_put(dev_priv); - /* Force GPU to min freq during suspend */ - gen6_rps_idle(dev_priv); + /* gen6_rps_idle() will be called later to disable interrupts */ + } + + void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) + { + dev_priv->rps.enabled = true; /* force disabling */ + intel_disable_gt_powersave(dev_priv); + + gen6_reset_rps_interrupts(dev_priv); } void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { - if (IS_IRONLAKE_M(dev_priv)) { - ironlake_disable_drps(dev_priv); - } else if (INTEL_INFO(dev_priv)->gen >= 6) { - intel_suspend_gt_powersave(dev_priv); + if (!READ_ONCE(dev_priv->rps.enabled)) + return; - mutex_lock(&dev_priv->rps.hw_lock); - if (INTEL_INFO(dev_priv)->gen >= 9) { - gen9_disable_rc6(dev_priv); - gen9_disable_rps(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) - cherryview_disable_rps(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_disable_rps(dev_priv); - else - gen6_disable_rps(dev_priv); + mutex_lock(&dev_priv->rps.hw_lock); - dev_priv->rps.enabled = false; - mutex_unlock(&dev_priv->rps.hw_lock); + if (INTEL_GEN(dev_priv) >= 9) { + gen9_disable_rc6(dev_priv); + gen9_disable_rps(dev_priv); + } else if (IS_CHERRYVIEW(dev_priv)) { + cherryview_disable_rps(dev_priv); + } else if (IS_VALLEYVIEW(dev_priv)) { + valleyview_disable_rps(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 6) { + gen6_disable_rps(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_disable_drps(dev_priv); } + + dev_priv->rps.enabled = false; + mutex_unlock(&dev_priv->rps.hw_lock); } - static void intel_gen6_powersave_work(struct work_struct *work) + void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, - rps.delayed_resume_work.work); + /* We shouldn't be disabling as we submit, so this should be less + * racy than it appears! + */ + if (READ_ONCE(dev_priv->rps.enabled)) + return; - mutex_lock(&dev_priv->rps.hw_lock); + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + return; - gen6_reset_rps_interrupts(dev_priv); + mutex_lock(&dev_priv->rps.hw_lock); if (IS_CHERRYVIEW(dev_priv)) { cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rps(dev_priv); - } else if (INTEL_INFO(dev_priv)->gen >= 9) { + } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - __gen6_update_ring_freq(dev_priv); + gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); - __gen6_update_ring_freq(dev_priv); - } else { + gen6_update_ring_freq(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 6) { gen6_enable_rps(dev_priv); - __gen6_update_ring_freq(dev_priv); + gen6_update_ring_freq(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_enable_drps(dev_priv); + intel_init_emon(dev_priv); } WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); @@@ -6666,18 -6634,47 +6634,47 @@@ WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); dev_priv->rps.enabled = true; + mutex_unlock(&dev_priv->rps.hw_lock); + } - gen6_enable_rps_interrupts(dev_priv); + static void __intel_autoenable_gt_powersave(struct work_struct *work) + { + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), rps.autoenable_work.work); + struct intel_engine_cs *rcs; + struct drm_i915_gem_request *req; - mutex_unlock(&dev_priv->rps.hw_lock); + if (READ_ONCE(dev_priv->rps.enabled)) + goto out; + + rcs = &dev_priv->engine[RCS]; + if (rcs->last_context) + goto out; + + if (!rcs->init_context) + goto out; + mutex_lock(&dev_priv->drm.struct_mutex); + + req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); + if (IS_ERR(req)) + goto unlock; + + if (!i915.enable_execlists && i915_switch_context(req) == 0) + rcs->init_context(req); + + /* Mark the device busy, calling intel_enable_gt_powersave() */ + i915_add_request_no_flush(req); + + unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + out: intel_runtime_pm_put(dev_priv); } - void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) + void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) { - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) + if (READ_ONCE(dev_priv->rps.enabled)) return; if (IS_IRONLAKE_M(dev_priv)) { @@@ -6698,21 -6695,13 +6695,13 @@@ * paths, so the _noresume version is enough (and in case of * runtime resume it's necessary). */ - if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, - round_jiffies_up_relative(HZ))) + if (queue_delayed_work(dev_priv->wq, + &dev_priv->rps.autoenable_work, + round_jiffies_up_relative(HZ))) intel_runtime_pm_get_noresume(dev_priv); } } - void intel_reset_gt_powersave(struct drm_i915_private *dev_priv) - { - if (INTEL_INFO(dev_priv)->gen < 6) - return; - - gen6_suspend_rps(dev_priv); - dev_priv->rps.enabled = false; - } - static void ibx_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@@ -7787,7 -7776,7 +7776,7 @@@ static void __intel_rps_boost_work(stru if (!i915_gem_request_completed(req)) gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); - i915_gem_request_unreference(req); + i915_gem_request_put(req); kfree(boost); } @@@ -7805,8 -7794,7 +7794,7 @@@ void intel_queue_rps_boost_for_request( if (boost == NULL) return; - i915_gem_request_reference(req); - boost->req = req; + boost->req = i915_gem_request_get(req); INIT_WORK(&boost->work, __intel_rps_boost_work); queue_work(req->i915->wq, &boost->work); @@@ -7819,11 -7807,9 +7807,9 @@@ void intel_pm_setup(struct drm_device * mutex_init(&dev_priv->rps.hw_lock); spin_lock_init(&dev_priv->rps.client_lock); - INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, - intel_gen6_powersave_work); + INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, + __intel_autoenable_gt_powersave); INIT_LIST_HEAD(&dev_priv->rps.clients); - INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); - INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); dev_priv->pm.suspended = false; atomic_set(&dev_priv->pm.wakeref_count, 0); diff --combined drivers/gpu/drm/i915/intel_psr.c index 2b0d1baf15b3,adf2ce0f38c0..59a21c9d2e43 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@@ -327,9 -327,6 +327,9 @@@ static bool intel_psr_match_conditions( struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc = dig_port->base.base.crtc; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + const struct drm_display_mode *adjusted_mode = + &intel_crtc->config->base.adjusted_mode; + int psr_setup_time; lockdep_assert_held(&dev_priv->psr.lock); WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); @@@ -368,25 -365,11 +368,25 @@@ } if (IS_HASWELL(dev) && - intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) { + adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { DRM_DEBUG_KMS("PSR condition failed: Interlaced is Enabled\n"); return false; } + psr_setup_time = drm_dp_psr_setup_time(intel_dp->psr_dpcd); + if (psr_setup_time < 0) { + DRM_DEBUG_KMS("PSR condition failed: Invalid PSR setup time (0x%02x)\n", + intel_dp->psr_dpcd[1]); + return false; + } + + if (intel_usecs_to_scanlines(adjusted_mode, psr_setup_time) > + adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vdisplay - 1) { + DRM_DEBUG_KMS("PSR condition failed: PSR setup time (%d us) too long\n", + psr_setup_time); + return false; + } + dev_priv->psr.source_ok = true; return true; } @@@ -645,9 -628,8 +645,8 @@@ unlock mutex_unlock(&dev_priv->psr.lock); } - static void intel_psr_exit(struct drm_device *dev) + static void intel_psr_exit(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_dp *intel_dp = dev_priv->psr.enabled; struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc; enum pipe pipe = to_intel_crtc(crtc)->pipe; @@@ -656,7 -638,7 +655,7 @@@ if (!dev_priv->psr.active) return; - if (HAS_DDI(dev)) { + if (HAS_DDI(dev_priv)) { val = I915_READ(EDP_PSR_CTL); WARN_ON(!(val & EDP_PSR_ENABLE)); @@@ -691,7 -673,7 +690,7 @@@ /** * intel_psr_single_frame_update - Single Frame Update - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * Some platforms support a single frame update feature that is used to @@@ -699,10 -681,9 +698,9 @@@ * So far it is only implemented for Valleyview and Cherryview because * hardware requires this to be done before a page flip. */ - void intel_psr_single_frame_update(struct drm_device *dev, + void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; u32 val; @@@ -711,7 -692,7 +709,7 @@@ * Single frame update is already supported on BDW+ but it requires * many W/A and it isn't really needed. */ - if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) return; mutex_lock(&dev_priv->psr.lock); @@@ -737,7 -718,7 +735,7 @@@ /** * intel_psr_invalidate - Invalidade PSR - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * Since the hardware frontbuffer tracking has gaps we need to integrate @@@ -747,10 -728,9 +745,9 @@@ * * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits." */ - void intel_psr_invalidate(struct drm_device *dev, + void intel_psr_invalidate(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@@ -767,14 -747,14 +764,14 @@@ dev_priv->psr.busy_frontbuffer_bits |= frontbuffer_bits; if (frontbuffer_bits) - intel_psr_exit(dev); + intel_psr_exit(dev_priv); mutex_unlock(&dev_priv->psr.lock); } /** * intel_psr_flush - Flush PSR - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * @origin: which operation caused the flush * @@@ -785,10 -765,9 +782,9 @@@ * * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits. */ - void intel_psr_flush(struct drm_device *dev, + void intel_psr_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@@ -806,7 -785,7 +802,7 @@@ /* By definition flush = invalidate + flush */ if (frontbuffer_bits) - intel_psr_exit(dev); + intel_psr_exit(dev_priv); if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits) if (!work_busy(&dev_priv->psr.work.work)) diff --combined drivers/gpu/drm/i915/intel_sprite.c index 1d9736b0cced,9ed7ad32cffd..cbdca7e4d307 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@@ -36,6 -36,7 +36,7 @@@ #include #include #include "intel_drv.h" + #include "intel_frontbuffer.h" #include #include "i915_drv.h" @@@ -53,8 -54,8 +54,8 @@@ format_is_yuv(uint32_t format } } -static int usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, - int usecs) +int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, + int usecs) { /* paranoia */ if (!adjusted_mode->crtc_htotal) @@@ -91,7 -92,7 +92,7 @@@ void intel_pipe_update_start(struct int vblank_start = DIV_ROUND_UP(vblank_start, 2); /* FIXME needs to be calibrated sensibly */ - min = vblank_start - usecs_to_scanlines(adjusted_mode, 100); + min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100); max = vblank_start - 1; local_irq_disable(); @@@ -211,14 -212,14 +212,14 @@@ skl_update_plane(struct drm_plane *drm_ u32 tile_height, plane_offset, plane_size; unsigned int rotation = plane_state->base.rotation; int x_offset, y_offset; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; plane_ctl = PLANE_CTL_ENABLE | PLANE_CTL_PIPE_GAMMA_ENABLE | @@@ -370,14 -371,14 +371,14 @@@ vlv_update_plane(struct drm_plane *dpla unsigned int rotation = dplane->state->rotation; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; sprctl = SP_ENABLE; @@@ -430,7 -431,7 +431,7 @@@ */ sprctl |= SP_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) sprctl |= SP_TILED; /* Sizes are 0 based */ @@@ -444,7 -445,7 +445,7 @@@ fb->pitches[0], rotation); linear_offset -= sprsurf_offset; - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { sprctl |= SP_ROTATE_180; x += src_w; @@@ -467,7 -468,7 +468,7 @@@ I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]); I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) I915_WRITE(SPTILEOFF(pipe, plane), (y << 16) | x); else I915_WRITE(SPLINOFF(pipe, plane), linear_offset); @@@ -512,14 -513,14 +513,14 @@@ ivb_update_plane(struct drm_plane *plan unsigned int rotation = plane_state->base.rotation; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; sprctl = SPRITE_ENABLE; @@@ -552,7 -553,7 +553,7 @@@ */ sprctl |= SPRITE_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) sprctl |= SPRITE_TILED; if (IS_HASWELL(dev) || IS_BROADWELL(dev)) @@@ -577,7 -578,7 +578,7 @@@ fb->pitches[0], rotation); linear_offset -= sprsurf_offset; - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { sprctl |= SPRITE_ROTATE_180; /* HSW and BDW does this automagically in hardware */ @@@ -606,7 -607,7 +607,7 @@@ * register */ if (IS_HASWELL(dev) || IS_BROADWELL(dev)) I915_WRITE(SPROFFSET(pipe), (y << 16) | x); - else if (obj->tiling_mode != I915_TILING_NONE) + else if (i915_gem_object_is_tiled(obj)) I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x); else I915_WRITE(SPRLINOFF(pipe), linear_offset); @@@ -653,14 -654,14 +654,14 @@@ ilk_update_plane(struct drm_plane *plan unsigned int rotation = plane_state->base.rotation; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; dvscntr = DVS_ENABLE; @@@ -693,7 -694,7 +694,7 @@@ */ dvscntr |= DVS_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dvscntr |= DVS_TILED; if (IS_GEN6(dev)) @@@ -714,7 -715,7 +715,7 @@@ fb->pitches[0], rotation); linear_offset -= dvssurf_offset; - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { dvscntr |= DVS_ROTATE_180; x += src_w; @@@ -736,7 -737,7 +737,7 @@@ I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]); I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x); else I915_WRITE(DVSLINOFF(pipe), linear_offset); @@@ -778,25 -779,15 +779,25 @@@ intel_check_sprite_plane(struct drm_pla int crtc_x, crtc_y; unsigned int crtc_w, crtc_h; uint32_t src_x, src_y, src_w, src_h; - struct drm_rect *src = &state->src; - struct drm_rect *dst = &state->dst; + struct drm_rect *src = &state->base.src; + struct drm_rect *dst = &state->base.dst; const struct drm_rect *clip = &state->clip; int hscale, vscale; int max_scale, min_scale; bool can_scale; + src->x1 = state->base.src_x; + src->y1 = state->base.src_y; + src->x2 = state->base.src_x + state->base.src_w; + src->y2 = state->base.src_y + state->base.src_h; + + dst->x1 = state->base.crtc_x; + dst->y1 = state->base.crtc_y; + dst->x2 = state->base.crtc_x + state->base.crtc_w; + dst->y2 = state->base.crtc_y + state->base.crtc_h; + if (!fb) { - state->visible = false; + state->base.visible = false; return 0; } @@@ -844,14 -835,14 +845,14 @@@ vscale = drm_rect_calc_vscale_relaxed(src, dst, min_scale, max_scale); BUG_ON(vscale < 0); - state->visible = drm_rect_clip_scaled(src, dst, clip, hscale, vscale); + state->base.visible = drm_rect_clip_scaled(src, dst, clip, hscale, vscale); crtc_x = dst->x1; crtc_y = dst->y1; crtc_w = drm_rect_width(dst); crtc_h = drm_rect_height(dst); - if (state->visible) { + if (state->base.visible) { /* check again in case clipping clamped the results */ hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); if (hscale < 0) { @@@ -908,12 -899,12 +909,12 @@@ crtc_w &= ~1; if (crtc_w == 0) - state->visible = false; + state->base.visible = false; } } /* Check size restrictions when scaling */ - if (state->visible && (src_w != crtc_w || src_h != crtc_h)) { + if (state->base.visible && (src_w != crtc_w || src_h != crtc_h)) { unsigned int width_bytes; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); @@@ -922,10 -913,10 +923,10 @@@ /* FIXME interlacing min height is 6 */ if (crtc_w < 3 || crtc_h < 3) - state->visible = false; + state->base.visible = false; if (src_w < 3 || src_h < 3) - state->visible = false; + state->base.visible = false; width_bytes = ((src_x * cpp) & 63) + src_w * cpp; @@@ -936,7 -927,7 +937,7 @@@ } } - if (state->visible) { + if (state->base.visible) { src->x1 = src_x << 16; src->x2 = (src_x + src_w) << 16; src->y1 = src_y << 16;