]> Git Repo - linux.git/blob - drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
[linux.git] / drivers / gpu / drm / xe / xe_gt_tlb_invalidation.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023 Intel Corporation
4  */
5
6 #include "xe_gt_tlb_invalidation.h"
7
8 #include "abi/guc_actions_abi.h"
9 #include "xe_device.h"
10 #include "xe_gt.h"
11 #include "xe_guc.h"
12 #include "xe_guc_ct.h"
13 #include "xe_trace.h"
14
15 #define TLB_TIMEOUT     (HZ / 4)
16
17 static void xe_gt_tlb_fence_timeout(struct work_struct *work)
18 {
19         struct xe_gt *gt = container_of(work, struct xe_gt,
20                                         tlb_invalidation.fence_tdr.work);
21         struct xe_gt_tlb_invalidation_fence *fence, *next;
22
23         spin_lock_irq(&gt->tlb_invalidation.pending_lock);
24         list_for_each_entry_safe(fence, next,
25                                  &gt->tlb_invalidation.pending_fences, link) {
26                 s64 since_inval_ms = ktime_ms_delta(ktime_get(),
27                                                     fence->invalidation_time);
28
29                 if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
30                         break;
31
32                 trace_xe_gt_tlb_invalidation_fence_timeout(fence);
33                 drm_err(&gt_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d recv=%d",
34                         gt->info.id, fence->seqno, gt->tlb_invalidation.seqno_recv);
35
36                 list_del(&fence->link);
37                 fence->base.error = -ETIME;
38                 dma_fence_signal(&fence->base);
39                 dma_fence_put(&fence->base);
40         }
41         if (!list_empty(&gt->tlb_invalidation.pending_fences))
42                 queue_delayed_work(system_wq,
43                                    &gt->tlb_invalidation.fence_tdr,
44                                    TLB_TIMEOUT);
45         spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
46 }
47
48 /**
49  * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
50  * @gt: graphics tile
51  *
52  * Initialize GT TLB invalidation state, purely software initialization, should
53  * be called once during driver load.
54  *
55  * Return: 0 on success, negative error code on error.
56  */
57 int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
58 {
59         gt->tlb_invalidation.seqno = 1;
60         INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
61         spin_lock_init(&gt->tlb_invalidation.pending_lock);
62         spin_lock_init(&gt->tlb_invalidation.lock);
63         gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
64         INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
65                           xe_gt_tlb_fence_timeout);
66
67         return 0;
68 }
69
70 static void
71 __invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
72 {
73         trace_xe_gt_tlb_invalidation_fence_signal(fence);
74         dma_fence_signal(&fence->base);
75         dma_fence_put(&fence->base);
76 }
77
78 static void
79 invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
80 {
81         list_del(&fence->link);
82         __invalidation_fence_signal(fence);
83 }
84
85 /**
86  * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
87  * @gt: graphics tile
88  *
89  * Signal any pending invalidation fences, should be called during a GT reset
90  */
91 void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
92 {
93         struct xe_gt_tlb_invalidation_fence *fence, *next;
94         struct xe_guc *guc = &gt->uc.guc;
95         int pending_seqno;
96
97         /*
98          * CT channel is already disabled at this point. No new TLB requests can
99          * appear.
100          */
101
102         mutex_lock(&gt->uc.guc.ct.lock);
103         spin_lock_irq(&gt->tlb_invalidation.pending_lock);
104         cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
105         /*
106          * We might have various kworkers waiting for TLB flushes to complete
107          * which are not tracked with an explicit TLB fence, however at this
108          * stage that will never happen since the CT is already disabled, so
109          * make sure we signal them here under the assumption that we have
110          * completed a full GT reset.
111          */
112         if (gt->tlb_invalidation.seqno == 1)
113                 pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
114         else
115                 pending_seqno = gt->tlb_invalidation.seqno - 1;
116         WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
117         wake_up_all(&guc->ct.wq);
118
119         list_for_each_entry_safe(fence, next,
120                                  &gt->tlb_invalidation.pending_fences, link)
121                 invalidation_fence_signal(fence);
122         spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
123         mutex_unlock(&gt->uc.guc.ct.lock);
124 }
125
126 static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
127 {
128         int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv);
129
130         if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
131                 return false;
132
133         if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
134                 return true;
135
136         return seqno_recv >= seqno;
137 }
138
139 static int send_tlb_invalidation(struct xe_guc *guc,
140                                  struct xe_gt_tlb_invalidation_fence *fence,
141                                  u32 *action, int len)
142 {
143         struct xe_gt *gt = guc_to_gt(guc);
144         int seqno;
145         int ret;
146
147         /*
148          * XXX: The seqno algorithm relies on TLB invalidation being processed
149          * in order which they currently are, if that changes the algorithm will
150          * need to be updated.
151          */
152
153         mutex_lock(&guc->ct.lock);
154         seqno = gt->tlb_invalidation.seqno;
155         if (fence) {
156                 fence->seqno = seqno;
157                 trace_xe_gt_tlb_invalidation_fence_send(fence);
158         }
159         action[1] = seqno;
160         ret = xe_guc_ct_send_locked(&guc->ct, action, len,
161                                     G2H_LEN_DW_TLB_INVALIDATE, 1);
162         if (!ret && fence) {
163                 spin_lock_irq(&gt->tlb_invalidation.pending_lock);
164                 /*
165                  * We haven't actually published the TLB fence as per
166                  * pending_fences, but in theory our seqno could have already
167                  * been written as we acquired the pending_lock. In such a case
168                  * we can just go ahead and signal the fence here.
169                  */
170                 if (tlb_invalidation_seqno_past(gt, seqno)) {
171                         __invalidation_fence_signal(fence);
172                 } else {
173                         fence->invalidation_time = ktime_get();
174                         list_add_tail(&fence->link,
175                                       &gt->tlb_invalidation.pending_fences);
176
177                         if (list_is_singular(&gt->tlb_invalidation.pending_fences))
178                                 queue_delayed_work(system_wq,
179                                                    &gt->tlb_invalidation.fence_tdr,
180                                                    TLB_TIMEOUT);
181                 }
182                 spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
183         } else if (ret < 0 && fence) {
184                 __invalidation_fence_signal(fence);
185         }
186         if (!ret) {
187                 gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
188                         TLB_INVALIDATION_SEQNO_MAX;
189                 if (!gt->tlb_invalidation.seqno)
190                         gt->tlb_invalidation.seqno = 1;
191                 ret = seqno;
192         }
193         mutex_unlock(&guc->ct.lock);
194
195         return ret;
196 }
197
198 #define MAKE_INVAL_OP(type)     ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
199                 XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
200                 XE_GUC_TLB_INVAL_FLUSH_CACHE)
201
202 /**
203  * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
204  * @gt: graphics tile
205  *
206  * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
207  * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
208  *
209  * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
210  * negative error code on error.
211  */
212 int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
213 {
214         u32 action[] = {
215                 XE_GUC_ACTION_TLB_INVALIDATION,
216                 0,  /* seqno, replaced in send_tlb_invalidation */
217                 MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
218         };
219
220         return send_tlb_invalidation(&gt->uc.guc, NULL, action,
221                                      ARRAY_SIZE(action));
222 }
223
224 /**
225  * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
226  * @gt: graphics tile
227  * @fence: invalidation fence which will be signal on TLB invalidation
228  * completion, can be NULL
229  * @vma: VMA to invalidate
230  *
231  * Issue a range based TLB invalidation if supported, if not fallback to a full
232  * TLB invalidation. Completion of TLB is asynchronous and caller can either use
233  * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
234  * completion.
235  *
236  * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
237  * negative error code on error.
238  */
239 int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
240                                struct xe_gt_tlb_invalidation_fence *fence,
241                                struct xe_vma *vma)
242 {
243         struct xe_device *xe = gt_to_xe(gt);
244 #define MAX_TLB_INVALIDATION_LEN        7
245         u32 action[MAX_TLB_INVALIDATION_LEN];
246         int len = 0;
247
248         xe_gt_assert(gt, vma);
249
250         action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
251         action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
252         if (!xe->info.has_range_tlb_invalidation) {
253                 action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
254         } else {
255                 u64 start = xe_vma_start(vma);
256                 u64 length = xe_vma_size(vma);
257                 u64 align, end;
258
259                 if (length < SZ_4K)
260                         length = SZ_4K;
261
262                 /*
263                  * We need to invalidate a higher granularity if start address
264                  * is not aligned to length. When start is not aligned with
265                  * length we need to find the length large enough to create an
266                  * address mask covering the required range.
267                  */
268                 align = roundup_pow_of_two(length);
269                 start = ALIGN_DOWN(xe_vma_start(vma), align);
270                 end = ALIGN(xe_vma_end(vma), align);
271                 length = align;
272                 while (start + length < end) {
273                         length <<= 1;
274                         start = ALIGN_DOWN(xe_vma_start(vma), length);
275                 }
276
277                 /*
278                  * Minimum invalidation size for a 2MB page that the hardware
279                  * expects is 16MB
280                  */
281                 if (length >= SZ_2M) {
282                         length = max_t(u64, SZ_16M, length);
283                         start = ALIGN_DOWN(xe_vma_start(vma), length);
284                 }
285
286                 xe_gt_assert(gt, length >= SZ_4K);
287                 xe_gt_assert(gt, is_power_of_2(length));
288                 xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
289                 xe_gt_assert(gt, IS_ALIGNED(start, length));
290
291                 action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
292                 action[len++] = xe_vma_vm(vma)->usm.asid;
293                 action[len++] = lower_32_bits(start);
294                 action[len++] = upper_32_bits(start);
295                 action[len++] = ilog2(length) - ilog2(SZ_4K);
296         }
297
298         xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
299
300         return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
301 }
302
303 /**
304  * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
305  * @gt: graphics tile
306  * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
307  *
308  * Wait for 200ms for a TLB invalidation to complete, in practice we always
309  * should receive the TLB invalidation within 200ms.
310  *
311  * Return: 0 on success, -ETIME on TLB invalidation timeout
312  */
313 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
314 {
315         struct xe_device *xe = gt_to_xe(gt);
316         struct xe_guc *guc = &gt->uc.guc;
317         struct drm_printer p = drm_err_printer(__func__);
318         int ret;
319
320         /*
321          * XXX: See above, this algorithm only works if seqno are always in
322          * order
323          */
324         ret = wait_event_timeout(guc->ct.wq,
325                                  tlb_invalidation_seqno_past(gt, seqno),
326                                  TLB_TIMEOUT);
327         if (!ret) {
328                 drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n",
329                         gt->info.id, seqno, gt->tlb_invalidation.seqno_recv);
330                 xe_guc_ct_print(&guc->ct, &p, true);
331                 return -ETIME;
332         }
333
334         return 0;
335 }
336
337 /**
338  * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
339  * @guc: guc
340  * @msg: message indicating TLB invalidation done
341  * @len: length of message
342  *
343  * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
344  * invalidation fences for seqno. Algorithm for this depends on seqno being
345  * received in-order and asserts this assumption.
346  *
347  * Return: 0 on success, -EPROTO for malformed messages.
348  */
349 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
350 {
351         struct xe_gt *gt = guc_to_gt(guc);
352         struct xe_gt_tlb_invalidation_fence *fence, *next;
353         unsigned long flags;
354
355         if (unlikely(len != 1))
356                 return -EPROTO;
357
358         /*
359          * This can also be run both directly from the IRQ handler and also in
360          * process_g2h_msg(). Only one may process any individual CT message,
361          * however the order they are processed here could result in skipping a
362          * seqno. To handle that we just process all the seqnos from the last
363          * seqno_recv up to and including the one in msg[0]. The delta should be
364          * very small so there shouldn't be much of pending_fences we actually
365          * need to iterate over here.
366          *
367          * From GuC POV we expect the seqnos to always appear in-order, so if we
368          * see something later in the timeline we can be sure that anything
369          * appearing earlier has already signalled, just that we have yet to
370          * officially process the CT message like if racing against
371          * process_g2h_msg().
372          */
373         spin_lock_irqsave(&gt->tlb_invalidation.pending_lock, flags);
374         if (tlb_invalidation_seqno_past(gt, msg[0])) {
375                 spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
376                 return 0;
377         }
378
379         /*
380          * wake_up_all() and wait_event_timeout() already have the correct
381          * barriers.
382          */
383         WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
384         wake_up_all(&guc->ct.wq);
385
386         list_for_each_entry_safe(fence, next,
387                                  &gt->tlb_invalidation.pending_fences, link) {
388                 trace_xe_gt_tlb_invalidation_fence_recv(fence);
389
390                 if (!tlb_invalidation_seqno_past(gt, fence->seqno))
391                         break;
392
393                 invalidation_fence_signal(fence);
394         }
395
396         if (!list_empty(&gt->tlb_invalidation.pending_fences))
397                 mod_delayed_work(system_wq,
398                                  &gt->tlb_invalidation.fence_tdr,
399                                  TLB_TIMEOUT);
400         else
401                 cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
402
403         spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
404
405         return 0;
406 }
This page took 0.058122 seconds and 4 git commands to generate.