#include <drm/drm_crtc.h>
#include <drm/drm_device.h>
#include <drm/drm_modeset_lock.h>
+ #include <drm/drm_print.h>
/**
* DOC: kms locking
static DEFINE_WW_CLASS(crtc_ww_class);
+ #if IS_ENABLED(CONFIG_DRM_DEBUG_MODESET_LOCK)
+ static noinline depot_stack_handle_t __stack_depot_save(void)
+ {
+ unsigned long entries[8];
+ unsigned int n;
+
+ n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+
+ return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
+ }
+
+ static void __stack_depot_print(depot_stack_handle_t stack_depot)
+ {
+ struct drm_printer p = drm_debug_printer("drm_modeset_lock");
+ unsigned long *entries;
+ unsigned int nr_entries;
+ char *buf;
+
+ buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
+ if (!buf)
+ return;
+
+ nr_entries = stack_depot_fetch(stack_depot, &entries);
+ stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 2);
+
+ drm_printf(&p, "attempting to lock a contended lock without backoff:\n%s", buf);
+
+ kfree(buf);
+ }
+ #else /* CONFIG_DRM_DEBUG_MODESET_LOCK */
+ static depot_stack_handle_t __stack_depot_save(void)
+ {
+ return 0;
+ }
+ static void __stack_depot_print(depot_stack_handle_t stack_depot)
+ {
+ }
+ #endif /* CONFIG_DRM_DEBUG_MODESET_LOCK */
+
/**
* drm_modeset_lock_all - take all modeset locks
* @dev: DRM device
*/
void drm_modeset_drop_locks(struct drm_modeset_acquire_ctx *ctx)
{
- WARN_ON(ctx->contended);
+ if (WARN_ON(ctx->contended))
+ __stack_depot_print(ctx->stack_depot);
+
while (!list_empty(&ctx->locked)) {
struct drm_modeset_lock *lock;
{
int ret;
- WARN_ON(ctx->contended);
+ if (WARN_ON(ctx->contended))
+ __stack_depot_print(ctx->stack_depot);
if (ctx->trylock_only) {
lockdep_assert_held(&ctx->ww_ctx);
- if (!ww_mutex_trylock(&lock->mutex))
+ if (!ww_mutex_trylock(&lock->mutex, NULL))
return -EBUSY;
else
return 0;
ret = 0;
} else if (ret == -EDEADLK) {
ctx->contended = lock;
+ ctx->stack_depot = __stack_depot_save();
}
return ret;
struct drm_modeset_lock *contended = ctx->contended;
ctx->contended = NULL;
+ ctx->stack_depot = 0;
if (WARN_ON(!contended))
return 0;
i915_sw_fence_init(&rq->submit, submit_notify);
i915_sw_fence_init(&rq->semaphore, semaphore_notify);
- dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0);
-
rq->capture_list = NULL;
init_llist_head(&rq->execute_cb);
rq->ring = ce->ring;
rq->execution_mask = ce->engine->mask;
- kref_init(&rq->fence.refcount);
- rq->fence.flags = 0;
- rq->fence.error = 0;
- INIT_LIST_HEAD(&rq->fence.cb_list);
-
ret = intel_timeline_get_seqno(tl, rq, &seqno);
if (ret)
goto err_free;
- rq->fence.context = tl->fence_context;
- rq->fence.seqno = seqno;
+ dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
+ tl->fence_context, seqno);
RCU_INIT_POINTER(rq->timeline, tl);
rq->hwsp_seqno = tl->hwsp_seqno;
return 0;
}
+static bool
+can_use_semaphore_wait(struct i915_request *to, struct i915_request *from)
+{
+ return to->engine->gt->ggtt == from->engine->gt->ggtt;
+}
+
static int
emit_semaphore_wait(struct i915_request *to,
struct i915_request *from,
const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask;
struct i915_sw_fence *wait = &to->submit;
+ if (!can_use_semaphore_wait(to, from))
+ goto await_fence;
+
if (!intel_context_use_semaphores(to->context))
goto await_fence;
* immediate execution, and so we must wait until it reaches the
* active slot.
*/
- if (intel_engine_has_semaphores(to->engine) &&
+ if (can_use_semaphore_wait(to, from) &&
+ intel_engine_has_semaphores(to->engine) &&
!i915_request_has_initial_breadcrumb(to)) {
err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
if (err < 0)
return err;
}
+static inline bool is_parallel_rq(struct i915_request *rq)
+{
+ return intel_context_is_parallel(rq->context);
+}
+
+static inline struct intel_context *request_to_parent(struct i915_request *rq)
+{
+ return intel_context_to_parent(rq->context);
+}
+
+static bool is_same_parallel_context(struct i915_request *to,
+ struct i915_request *from)
+{
+ if (is_parallel_rq(to))
+ return request_to_parent(to) == request_to_parent(from);
+
+ return false;
+}
+
int
i915_request_await_execution(struct i915_request *rq,
struct dma_fence *fence)
* want to run our callback in all cases.
*/
- if (dma_fence_is_i915(fence))
+ if (dma_fence_is_i915(fence)) {
+ if (is_same_parallel_context(rq, to_request(fence)))
+ continue;
ret = __i915_request_await_execution(rq,
to_request(fence));
- else
+ } else {
ret = i915_request_await_external(rq, fence);
+ }
if (ret < 0)
return ret;
} while (--nchild);
fence))
continue;
- if (dma_fence_is_i915(fence))
+ if (dma_fence_is_i915(fence)) {
+ if (is_same_parallel_context(rq, to_request(fence)))
+ continue;
ret = i915_request_await_request(rq, to_request(fence));
- else
+ } else {
ret = i915_request_await_external(rq, fence);
+ }
if (ret < 0)
return ret;
struct drm_i915_gem_object *obj,
bool write)
{
- struct dma_fence *excl;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
int ret = 0;
- if (write) {
- struct dma_fence **shared;
- unsigned int count, i;
-
- ret = dma_resv_get_fences(obj->base.resv, &excl, &count,
- &shared);
+ dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) {
+ ret = i915_request_await_dma_fence(to, fence);
if (ret)
- return ret;
-
- for (i = 0; i < count; i++) {
- ret = i915_request_await_dma_fence(to, shared[i]);
- if (ret)
- break;
-
- dma_fence_put(shared[i]);
- }
-
- for (; i < count; i++)
- dma_fence_put(shared[i]);
- kfree(shared);
- } else {
- excl = dma_resv_get_excl_unlocked(obj->base.resv);
- }
-
- if (excl) {
- if (ret == 0)
- ret = i915_request_await_dma_fence(to, excl);
-
- dma_fence_put(excl);
+ break;
}
return ret;
}
static struct i915_request *
-__i915_request_add_to_timeline(struct i915_request *rq)
+__i915_request_ensure_parallel_ordering(struct i915_request *rq,
+ struct intel_timeline *timeline)
{
- struct intel_timeline *timeline = i915_request_timeline(rq);
struct i915_request *prev;
- /*
- * Dependency tracking and request ordering along the timeline
- * is special cased so that we can eliminate redundant ordering
- * operations while building the request (we know that the timeline
- * itself is ordered, and here we guarantee it).
- *
- * As we know we will need to emit tracking along the timeline,
- * we embed the hooks into our request struct -- at the cost of
- * having to have specialised no-allocation interfaces (which will
- * be beneficial elsewhere).
- *
- * A second benefit to open-coding i915_request_await_request is
- * that we can apply a slight variant of the rules specialised
- * for timelines that jump between engines (such as virtual engines).
- * If we consider the case of virtual engine, we must emit a dma-fence
- * to prevent scheduling of the second request until the first is
- * complete (to maximise our greedy late load balancing) and this
- * precludes optimising to use semaphores serialisation of a single
- * timeline across engines.
- */
+ GEM_BUG_ON(!is_parallel_rq(rq));
+
+ prev = request_to_parent(rq)->parallel.last_rq;
+ if (prev) {
+ if (!__i915_request_is_complete(prev)) {
+ i915_sw_fence_await_sw_fence(&rq->submit,
+ &prev->submit,
+ &rq->submitq);
+
+ if (rq->engine->sched_engine->schedule)
+ __i915_sched_node_add_dependency(&rq->sched,
+ &prev->sched,
+ &rq->dep,
+ 0);
+ }
+ i915_request_put(prev);
+ }
+
+ request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
+
+ return to_request(__i915_active_fence_set(&timeline->last_request,
+ &rq->fence));
+}
+
+static struct i915_request *
+__i915_request_ensure_ordering(struct i915_request *rq,
+ struct intel_timeline *timeline)
+{
+ struct i915_request *prev;
+
+ GEM_BUG_ON(is_parallel_rq(rq));
+
prev = to_request(__i915_active_fence_set(&timeline->last_request,
&rq->fence));
+
if (prev && !__i915_request_is_complete(prev)) {
bool uses_guc = intel_engine_uses_guc(rq->engine);
+ bool pow2 = is_power_of_2(READ_ONCE(prev->engine)->mask |
+ rq->engine->mask);
+ bool same_context = prev->context == rq->context;
/*
* The requests are supposed to be kept in order. However,
* is used as a barrier for external modification to this
* context.
*/
- GEM_BUG_ON(prev->context == rq->context &&
+ GEM_BUG_ON(same_context &&
i915_seqno_passed(prev->fence.seqno,
rq->fence.seqno));
- if ((!uses_guc &&
- is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) ||
- (uses_guc && prev->context == rq->context))
+ if ((same_context && uses_guc) || (!uses_guc && pow2))
i915_sw_fence_await_sw_fence(&rq->submit,
&prev->submit,
&rq->submitq);
0);
}
+ return prev;
+}
+
+static struct i915_request *
+__i915_request_add_to_timeline(struct i915_request *rq)
+{
+ struct intel_timeline *timeline = i915_request_timeline(rq);
+ struct i915_request *prev;
+
+ /*
+ * Dependency tracking and request ordering along the timeline
+ * is special cased so that we can eliminate redundant ordering
+ * operations while building the request (we know that the timeline
+ * itself is ordered, and here we guarantee it).
+ *
+ * As we know we will need to emit tracking along the timeline,
+ * we embed the hooks into our request struct -- at the cost of
+ * having to have specialised no-allocation interfaces (which will
+ * be beneficial elsewhere).
+ *
+ * A second benefit to open-coding i915_request_await_request is
+ * that we can apply a slight variant of the rules specialised
+ * for timelines that jump between engines (such as virtual engines).
+ * If we consider the case of virtual engine, we must emit a dma-fence
+ * to prevent scheduling of the second request until the first is
+ * complete (to maximise our greedy late load balancing) and this
+ * precludes optimising to use semaphores serialisation of a single
+ * timeline across engines.
+ *
+ * We do not order parallel submission requests on the timeline as each
+ * parallel submission context has its own timeline and the ordering
+ * rules for parallel requests are that they must be submitted in the
+ * order received from the execbuf IOCTL. So rather than using the
+ * timeline we store a pointer to last request submitted in the
+ * relationship in the gem context and insert a submission fence
+ * between that request and request passed into this function or
+ * alternatively we use completion fence if gem context has a single
+ * timeline and this is the first submission of an execbuf IOCTL.
+ */
+ if (likely(!is_parallel_rq(rq)))
+ prev = __i915_request_ensure_ordering(rq, timeline);
+ else
+ prev = __i915_request_ensure_parallel_ordering(rq, timeline);
+
/*
* Make sure that no request gazumped us - if it was allocated after
* our i915_request_alloc() and called __i915_request_add() before
* completion. That requires having a good predictor for the request
* duration, which we currently lack.
*/
- if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
+ if (CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT &&
__i915_spin_request(rq, state))
goto out;
/** @index: index into the shared fences */
unsigned int index;
- /** @fences: the shared fences */
+ /** @fences: the shared fences; private, *MUST* not dereference */
struct dma_resv_list *fences;
+ /** @shared_count: number of shared fences */
+ unsigned int shared_count;
+
/** @is_restarted: true if this is the first returned fence */
bool is_restarted;
};
struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor);
struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor);
+ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor);
+ struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor);
/**
* dma_resv_iter_begin - initialize a dma_resv_iter object
for (fence = dma_resv_iter_first_unlocked(cursor); \
fence; fence = dma_resv_iter_next_unlocked(cursor))
+ /**
+ * dma_resv_for_each_fence - fence iterator
+ * @cursor: a struct dma_resv_iter pointer
+ * @obj: a dma_resv object pointer
+ * @all_fences: true if all fences should be returned
+ * @fence: the current fence
+ *
+ * Iterate over the fences in a struct dma_resv object while holding the
+ * &dma_resv.lock. @all_fences controls if the shared fences are returned as
+ * well. The cursor initialisation is part of the iterator and the fence stays
+ * valid as long as the lock is held and so no extra reference to the fence is
+ * taken.
+ */
+ #define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \
+ for (dma_resv_iter_begin(cursor, obj, all_fences), \
+ fence = dma_resv_iter_first(cursor); fence; \
+ fence = dma_resv_iter_next(cursor))
+
#define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base)
#define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
*/
static inline bool __must_check dma_resv_trylock(struct dma_resv *obj)
{
- return ww_mutex_trylock(&obj->lock);
+ return ww_mutex_trylock(&obj->lock, NULL);
}
/**