From: Dave Airlie Date: Fri, 5 Nov 2021 03:50:09 +0000 (+1000) Subject: Merge tag 'drm-misc-next-2021-10-14' of git://anongit.freedesktop.org/drm/drm-misc... X-Git-Tag: v5.16-rc1~38^2~7 X-Git-Url: https://repo.jachan.dev/linux.git/commitdiff_plain/5275a99e35e5a1d1f68038b0560d0e7eaf624e86?hp=-c Merge tag 'drm-misc-next-2021-10-14' of git://anongit.freedesktop.org/drm/drm-misc into drm-next drm-misc-next for 5.16: UAPI Changes: Cross-subsystem Changes: Core Changes: - fbdev: Fix double-free, Remove unused scrolling acceleration - locking: improve logging for contented locks without backoff - dma-buf: Add dma_resv_for_each_fence iterator, and conversion of users Driver Changes: - nouveau: Various code style improvements - bridge: HPD improvements for lt9611uxc, eDP aux-bus support for ps8640, lvds-codec data-mapping selection support - panels: Vivax TPC-9150, Innolux G070Y2-T02, LOGIC Technologies LTTD800480070-L2RT, Sharp LS060T1SX01, Signed-off-by: Dave Airlie From: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20211014120452.2wicnt6hobu3kbwb@gilmour --- 5275a99e35e5a1d1f68038b0560d0e7eaf624e86 diff --combined drivers/gpu/drm/drm_modeset_lock.c index bf8a6e823a15,62f6fe482d25..4d32b61fa1fd --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@@ -25,6 -25,7 +25,7 @@@ #include #include #include + #include /** * DOC: kms locking @@@ -77,6 -78,45 +78,45 @@@ static DEFINE_WW_CLASS(crtc_ww_class); + #if IS_ENABLED(CONFIG_DRM_DEBUG_MODESET_LOCK) + static noinline depot_stack_handle_t __stack_depot_save(void) + { + unsigned long entries[8]; + unsigned int n; + + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); + + return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN); + } + + static void __stack_depot_print(depot_stack_handle_t stack_depot) + { + struct drm_printer p = drm_debug_printer("drm_modeset_lock"); + unsigned long *entries; + unsigned int nr_entries; + char *buf; + + buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN); + if (!buf) + return; + + nr_entries = stack_depot_fetch(stack_depot, &entries); + stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 2); + + drm_printf(&p, "attempting to lock a contended lock without backoff:\n%s", buf); + + kfree(buf); + } + #else /* CONFIG_DRM_DEBUG_MODESET_LOCK */ + static depot_stack_handle_t __stack_depot_save(void) + { + return 0; + } + static void __stack_depot_print(depot_stack_handle_t stack_depot) + { + } + #endif /* CONFIG_DRM_DEBUG_MODESET_LOCK */ + /** * drm_modeset_lock_all - take all modeset locks * @dev: DRM device @@@ -225,7 -265,9 +265,9 @@@ EXPORT_SYMBOL(drm_modeset_acquire_fini) */ void drm_modeset_drop_locks(struct drm_modeset_acquire_ctx *ctx) { - WARN_ON(ctx->contended); + if (WARN_ON(ctx->contended)) + __stack_depot_print(ctx->stack_depot); + while (!list_empty(&ctx->locked)) { struct drm_modeset_lock *lock; @@@ -243,12 -285,13 +285,13 @@@ static inline int modeset_lock(struct d { int ret; - WARN_ON(ctx->contended); + if (WARN_ON(ctx->contended)) + __stack_depot_print(ctx->stack_depot); if (ctx->trylock_only) { lockdep_assert_held(&ctx->ww_ctx); - if (!ww_mutex_trylock(&lock->mutex)) + if (!ww_mutex_trylock(&lock->mutex, NULL)) return -EBUSY; else return 0; @@@ -274,6 -317,7 +317,7 @@@ ret = 0; } else if (ret == -EDEADLK) { ctx->contended = lock; + ctx->stack_depot = __stack_depot_save(); } return ret; @@@ -296,6 -340,7 +340,7 @@@ int drm_modeset_backoff(struct drm_mode struct drm_modeset_lock *contended = ctx->contended; ctx->contended = NULL; + ctx->stack_depot = 0; if (WARN_ON(!contended)) return 0; diff --combined drivers/gpu/drm/i915/i915_request.c index 2c3cd6e635b5,3839712ebd23..820a1f38b271 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@@ -829,6 -829,8 +829,6 @@@ static void __i915_request_ctor(void *a i915_sw_fence_init(&rq->submit, submit_notify); i915_sw_fence_init(&rq->semaphore, semaphore_notify); - dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0); - rq->capture_list = NULL; init_llist_head(&rq->execute_cb); @@@ -903,12 -905,17 +903,12 @@@ __i915_request_create(struct intel_cont rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; - kref_init(&rq->fence.refcount); - rq->fence.flags = 0; - rq->fence.error = 0; - INIT_LIST_HEAD(&rq->fence.cb_list); - ret = intel_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; - rq->fence.context = tl->fence_context; - rq->fence.seqno = seqno; + dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, + tl->fence_context, seqno); RCU_INIT_POINTER(rq->timeline, tl); rq->hwsp_seqno = tl->hwsp_seqno; @@@ -1145,12 -1152,6 +1145,12 @@@ __emit_semaphore_wait(struct i915_reque return 0; } +static bool +can_use_semaphore_wait(struct i915_request *to, struct i915_request *from) +{ + return to->engine->gt->ggtt == from->engine->gt->ggtt; +} + static int emit_semaphore_wait(struct i915_request *to, struct i915_request *from, @@@ -1159,9 -1160,6 +1159,9 @@@ const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask; struct i915_sw_fence *wait = &to->submit; + if (!can_use_semaphore_wait(to, from)) + goto await_fence; + if (!intel_context_use_semaphores(to->context)) goto await_fence; @@@ -1265,8 -1263,7 +1265,8 @@@ __i915_request_await_execution(struct i * immediate execution, and so we must wait until it reaches the * active slot. */ - if (intel_engine_has_semaphores(to->engine) && + if (can_use_semaphore_wait(to, from) && + intel_engine_has_semaphores(to->engine) && !i915_request_has_initial_breadcrumb(to)) { err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); if (err < 0) @@@ -1335,25 -1332,6 +1335,25 @@@ i915_request_await_external(struct i915 return err; } +static inline bool is_parallel_rq(struct i915_request *rq) +{ + return intel_context_is_parallel(rq->context); +} + +static inline struct intel_context *request_to_parent(struct i915_request *rq) +{ + return intel_context_to_parent(rq->context); +} + +static bool is_same_parallel_context(struct i915_request *to, + struct i915_request *from) +{ + if (is_parallel_rq(to)) + return request_to_parent(to) == request_to_parent(from); + + return false; +} + int i915_request_await_execution(struct i915_request *rq, struct dma_fence *fence) @@@ -1385,14 -1363,11 +1385,14 @@@ * want to run our callback in all cases. */ - if (dma_fence_is_i915(fence)) + if (dma_fence_is_i915(fence)) { + if (is_same_parallel_context(rq, to_request(fence))) + continue; ret = __i915_request_await_execution(rq, to_request(fence)); - else + } else { ret = i915_request_await_external(rq, fence); + } if (ret < 0) return ret; } while (--nchild); @@@ -1493,13 -1468,10 +1493,13 @@@ i915_request_await_dma_fence(struct i91 fence)) continue; - if (dma_fence_is_i915(fence)) + if (dma_fence_is_i915(fence)) { + if (is_same_parallel_context(rq, to_request(fence))) + continue; ret = i915_request_await_request(rq, to_request(fence)); - else + } else { ret = i915_request_await_external(rq, fence); + } if (ret < 0) return ret; @@@ -1537,89 -1509,49 +1537,65 @@@ i915_request_await_object(struct i915_r struct drm_i915_gem_object *obj, bool write) { - struct dma_fence *excl; + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret = 0; - if (write) { - struct dma_fence **shared; - unsigned int count, i; - - ret = dma_resv_get_fences(obj->base.resv, &excl, &count, - &shared); + dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) { + ret = i915_request_await_dma_fence(to, fence); if (ret) - return ret; - - for (i = 0; i < count; i++) { - ret = i915_request_await_dma_fence(to, shared[i]); - if (ret) - break; - - dma_fence_put(shared[i]); - } - - for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(obj->base.resv); - } - - if (excl) { - if (ret == 0) - ret = i915_request_await_dma_fence(to, excl); - - dma_fence_put(excl); + break; } return ret; } static struct i915_request * -__i915_request_add_to_timeline(struct i915_request *rq) +__i915_request_ensure_parallel_ordering(struct i915_request *rq, + struct intel_timeline *timeline) { - struct intel_timeline *timeline = i915_request_timeline(rq); struct i915_request *prev; - /* - * Dependency tracking and request ordering along the timeline - * is special cased so that we can eliminate redundant ordering - * operations while building the request (we know that the timeline - * itself is ordered, and here we guarantee it). - * - * As we know we will need to emit tracking along the timeline, - * we embed the hooks into our request struct -- at the cost of - * having to have specialised no-allocation interfaces (which will - * be beneficial elsewhere). - * - * A second benefit to open-coding i915_request_await_request is - * that we can apply a slight variant of the rules specialised - * for timelines that jump between engines (such as virtual engines). - * If we consider the case of virtual engine, we must emit a dma-fence - * to prevent scheduling of the second request until the first is - * complete (to maximise our greedy late load balancing) and this - * precludes optimising to use semaphores serialisation of a single - * timeline across engines. - */ + GEM_BUG_ON(!is_parallel_rq(rq)); + + prev = request_to_parent(rq)->parallel.last_rq; + if (prev) { + if (!__i915_request_is_complete(prev)) { + i915_sw_fence_await_sw_fence(&rq->submit, + &prev->submit, + &rq->submitq); + + if (rq->engine->sched_engine->schedule) + __i915_sched_node_add_dependency(&rq->sched, + &prev->sched, + &rq->dep, + 0); + } + i915_request_put(prev); + } + + request_to_parent(rq)->parallel.last_rq = i915_request_get(rq); + + return to_request(__i915_active_fence_set(&timeline->last_request, + &rq->fence)); +} + +static struct i915_request * +__i915_request_ensure_ordering(struct i915_request *rq, + struct intel_timeline *timeline) +{ + struct i915_request *prev; + + GEM_BUG_ON(is_parallel_rq(rq)); + prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); + if (prev && !__i915_request_is_complete(prev)) { bool uses_guc = intel_engine_uses_guc(rq->engine); + bool pow2 = is_power_of_2(READ_ONCE(prev->engine)->mask | + rq->engine->mask); + bool same_context = prev->context == rq->context; /* * The requests are supposed to be kept in order. However, @@@ -1627,11 -1559,13 +1603,11 @@@ * is used as a barrier for external modification to this * context. */ - GEM_BUG_ON(prev->context == rq->context && + GEM_BUG_ON(same_context && i915_seqno_passed(prev->fence.seqno, rq->fence.seqno)); - if ((!uses_guc && - is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) || - (uses_guc && prev->context == rq->context)) + if ((same_context && uses_guc) || (!uses_guc && pow2)) i915_sw_fence_await_sw_fence(&rq->submit, &prev->submit, &rq->submitq); @@@ -1646,50 -1580,6 +1622,50 @@@ 0); } + return prev; +} + +static struct i915_request * +__i915_request_add_to_timeline(struct i915_request *rq) +{ + struct intel_timeline *timeline = i915_request_timeline(rq); + struct i915_request *prev; + + /* + * Dependency tracking and request ordering along the timeline + * is special cased so that we can eliminate redundant ordering + * operations while building the request (we know that the timeline + * itself is ordered, and here we guarantee it). + * + * As we know we will need to emit tracking along the timeline, + * we embed the hooks into our request struct -- at the cost of + * having to have specialised no-allocation interfaces (which will + * be beneficial elsewhere). + * + * A second benefit to open-coding i915_request_await_request is + * that we can apply a slight variant of the rules specialised + * for timelines that jump between engines (such as virtual engines). + * If we consider the case of virtual engine, we must emit a dma-fence + * to prevent scheduling of the second request until the first is + * complete (to maximise our greedy late load balancing) and this + * precludes optimising to use semaphores serialisation of a single + * timeline across engines. + * + * We do not order parallel submission requests on the timeline as each + * parallel submission context has its own timeline and the ordering + * rules for parallel requests are that they must be submitted in the + * order received from the execbuf IOCTL. So rather than using the + * timeline we store a pointer to last request submitted in the + * relationship in the gem context and insert a submission fence + * between that request and request passed into this function or + * alternatively we use completion fence if gem context has a single + * timeline and this is the first submission of an execbuf IOCTL. + */ + if (likely(!is_parallel_rq(rq))) + prev = __i915_request_ensure_ordering(rq, timeline); + else + prev = __i915_request_ensure_parallel_ordering(rq, timeline); + /* * Make sure that no request gazumped us - if it was allocated after * our i915_request_alloc() and called __i915_request_add() before @@@ -1945,7 -1835,7 +1921,7 @@@ long i915_request_wait(struct i915_requ * completion. That requires having a good predictor for the request * duration, which we currently lack. */ - if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) && + if (CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT && __i915_spin_request(rq, state)) goto out; diff --combined drivers/gpu/drm/nouveau/nouveau_bo.c index 12b107acb6ee,17a0a3ece485..fa73fe57f97b --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@@ -844,7 -844,6 +844,7 @@@ nouveau_bo_move_init(struct nouveau_dr struct ttm_resource *, struct ttm_resource *); int (*init)(struct nouveau_channel *, u32 handle); } _methods[] = { + { "COPY", 4, 0xc7b5, nve0_bo_move_copy, nve0_bo_move_init }, { "COPY", 4, 0xc5b5, nve0_bo_move_copy, nve0_bo_move_init }, { "GRCE", 0, 0xc5b5, nve0_bo_move_copy, nvc0_bo_move_init }, { "COPY", 4, 0xc3b5, nve0_bo_move_copy, nve0_bo_move_init }, @@@ -1249,7 -1248,6 +1249,6 @@@ nouveau_ttm_tt_populate(struct ttm_devi { struct ttm_tt *ttm_dma = (void *)ttm; struct nouveau_drm *drm; - struct device *dev; bool slave = !!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL); if (ttm_tt_is_populated(ttm)) @@@ -1262,7 -1260,6 +1261,6 @@@ } drm = nouveau_bdev(bdev); - dev = drm->dev->dev; return ttm_pool_alloc(&drm->ttm.bdev.pool, ttm, ctx); } @@@ -1272,7 -1269,6 +1270,6 @@@ nouveau_ttm_tt_unpopulate(struct ttm_de struct ttm_tt *ttm) { struct nouveau_drm *drm; - struct device *dev; bool slave = !!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL); if (slave) @@@ -1281,7 -1277,6 +1278,6 @@@ nouveau_ttm_tt_unbind(bdev, ttm); drm = nouveau_bdev(bdev); - dev = drm->dev->dev; return ttm_pool_free(&drm->ttm.bdev.pool, ttm); } diff --combined drivers/gpu/drm/nouveau/nouveau_gem.c index 8c2ecc282723,d476940ee97c..b3210e9b2cd8 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@@ -247,8 -247,10 +247,8 @@@ nouveau_gem_new(struct nouveau_cli *cli } ret = nouveau_bo_init(nvbo, size, align, domain, NULL, NULL); - if (ret) { - nouveau_bo_ref(NULL, &nvbo); + if (ret) return ret; - } /* we restrict allowed domains on nv50+ to only the types * that were requested at creation time. not possibly on @@@ -337,7 -339,7 +337,7 @@@ nouveau_gem_set_domain(struct drm_gem_o struct ttm_buffer_object *bo = &nvbo->bo; uint32_t domains = valid_domains & nvbo->valid_domains & (write_domains ? write_domains : read_domains); - uint32_t pref_domains = 0;; + uint32_t pref_domains = 0; if (!domains) return -EINVAL; diff --combined drivers/gpu/drm/panel/Kconfig index 2cb8eba76af8,eb01549a6ccb..cfc8d644cedf --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@@ -307,7 -307,6 +307,7 @@@ config DRM_PANEL_OLIMEX_LCD_OLINUXIN depends on OF depends on I2C depends on BACKLIGHT_CLASS_DEVICE + select CRC32 help The panel is used with different sizes LCDs, from 480x272 to 1280x800, and 24 bit per pixel. @@@ -520,6 -519,16 +520,16 @@@ config DRM_PANEL_SHARP_LS043T1LE0 Say Y here if you want to enable support for Sharp LS043T1LE01 qHD (540x960) DSI panel as found on the Qualcomm APQ8074 Dragonboard + config DRM_PANEL_SHARP_LS060T1SX01 + tristate "Sharp LS060T1SX01 FullHD video mode panel" + depends on OF + depends on DRM_MIPI_DSI + depends on BACKLIGHT_CLASS_DEVICE + help + Say Y here if you want to enable support for Sharp LS060T1SX01 6.0" + FullHD (1080x1920) DSI panel as found in Dragonboard Display Adapter + Bundle. + config DRM_PANEL_SITRONIX_ST7701 tristate "Sitronix ST7701 panel driver" depends on OF diff --combined include/linux/dma-resv.h index 8b6c20636a79,45f1d4812a37..dbd235ab447f --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@@ -170,15 -170,20 +170,20 @@@ struct dma_resv_iter /** @index: index into the shared fences */ unsigned int index; - /** @fences: the shared fences */ + /** @fences: the shared fences; private, *MUST* not dereference */ struct dma_resv_list *fences; + /** @shared_count: number of shared fences */ + unsigned int shared_count; + /** @is_restarted: true if this is the first returned fence */ bool is_restarted; }; struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); + struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor); + struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor); /** * dma_resv_iter_begin - initialize a dma_resv_iter object @@@ -244,6 -249,24 +249,24 @@@ static inline bool dma_resv_iter_is_res for (fence = dma_resv_iter_first_unlocked(cursor); \ fence; fence = dma_resv_iter_next_unlocked(cursor)) + /** + * dma_resv_for_each_fence - fence iterator + * @cursor: a struct dma_resv_iter pointer + * @obj: a dma_resv object pointer + * @all_fences: true if all fences should be returned + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object while holding the + * &dma_resv.lock. @all_fences controls if the shared fences are returned as + * well. The cursor initialisation is part of the iterator and the fence stays + * valid as long as the lock is held and so no extra reference to the fence is + * taken. + */ + #define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \ + for (dma_resv_iter_begin(cursor, obj, all_fences), \ + fence = dma_resv_iter_first(cursor); fence; \ + fence = dma_resv_iter_next(cursor)) + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) @@@ -357,7 -380,7 +380,7 @@@ static inline int dma_resv_lock_slow_in */ static inline bool __must_check dma_resv_trylock(struct dma_resv *obj) { - return ww_mutex_trylock(&obj->lock); + return ww_mutex_trylock(&obj->lock, NULL); } /**