]>
Commit | Line | Data |
---|---|---|
d38ceaf9 AD |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * Copyright 2008 Red Hat Inc. | |
4 | * Copyright 2009 Jerome Glisse. | |
5 | * | |
6 | * Permission is hereby granted, free of charge, to any person obtaining a | |
7 | * copy of this software and associated documentation files (the "Software"), | |
8 | * to deal in the Software without restriction, including without limitation | |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
10 | * and/or sell copies of the Software, and to permit persons to whom the | |
11 | * Software is furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in | |
14 | * all copies or substantial portions of the Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
22 | * OTHER DEALINGS IN THE SOFTWARE. | |
23 | * | |
24 | */ | |
fdf2f6c5 | 25 | |
ec71b250 | 26 | #include <linux/firmware.h> |
d361ad5d SS |
27 | #include <linux/pm_runtime.h> |
28 | ||
d38ceaf9 | 29 | #include "amdgpu.h" |
356aee30 | 30 | #include "amdgpu_gfx.h" |
88dfc9a3 | 31 | #include "amdgpu_rlc.h" |
6caeee7a | 32 | #include "amdgpu_ras.h" |
d361ad5d | 33 | #include "amdgpu_reset.h" |
8e7fd193 | 34 | #include "amdgpu_xcp.h" |
b1338a8e | 35 | #include "amdgpu_xgmi.h" |
d38ceaf9 | 36 | |
bf9b1d9d RZ |
37 | /* delay 0.1 second to enable gfx off feature */ |
38 | #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) | |
1e317b99 | 39 | |
1d617c02 LL |
40 | #define GFX_OFF_NO_DELAY 0 |
41 | ||
d38ceaf9 | 42 | /* |
448fe192 | 43 | * GPU GFX IP block helpers function. |
d38ceaf9 | 44 | */ |
448fe192 | 45 | |
7470bfcf HZ |
46 | int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, |
47 | int pipe, int queue) | |
448fe192 HR |
48 | { |
49 | int bit = 0; | |
50 | ||
51 | bit += mec * adev->gfx.mec.num_pipe_per_mec | |
52 | * adev->gfx.mec.num_queue_per_pipe; | |
53 | bit += pipe * adev->gfx.mec.num_queue_per_pipe; | |
54 | bit += queue; | |
55 | ||
56 | return bit; | |
57 | } | |
58 | ||
5c180eb9 | 59 | void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, |
7470bfcf | 60 | int *mec, int *pipe, int *queue) |
448fe192 HR |
61 | { |
62 | *queue = bit % adev->gfx.mec.num_queue_per_pipe; | |
63 | *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) | |
64 | % adev->gfx.mec.num_pipe_per_mec; | |
65 | *mec = (bit / adev->gfx.mec.num_queue_per_pipe) | |
66 | / adev->gfx.mec.num_pipe_per_mec; | |
67 | ||
68 | } | |
69 | ||
70 | bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, | |
be697aa3 | 71 | int xcc_id, int mec, int pipe, int queue) |
448fe192 | 72 | { |
7470bfcf | 73 | return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), |
be697aa3 | 74 | adev->gfx.mec_bitmap[xcc_id].queue_bitmap); |
448fe192 HR |
75 | } |
76 | ||
7470bfcf HZ |
77 | int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, |
78 | int me, int pipe, int queue) | |
79 | { | |
80 | int bit = 0; | |
81 | ||
82 | bit += me * adev->gfx.me.num_pipe_per_me | |
83 | * adev->gfx.me.num_queue_per_pipe; | |
84 | bit += pipe * adev->gfx.me.num_queue_per_pipe; | |
85 | bit += queue; | |
86 | ||
87 | return bit; | |
88 | } | |
89 | ||
7470bfcf HZ |
90 | bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, |
91 | int me, int pipe, int queue) | |
92 | { | |
93 | return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue), | |
94 | adev->gfx.me.queue_bitmap); | |
95 | } | |
96 | ||
6f8941a2 NH |
97 | /** |
98 | * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter | |
99 | * | |
100 | * @mask: array in which the per-shader array disable masks will be stored | |
101 | * @max_se: number of SEs | |
102 | * @max_sh: number of SHs | |
103 | * | |
104 | * The bitmask of CUs to be disabled in the shader array determined by se and | |
105 | * sh is stored in mask[se * max_sh + sh]. | |
106 | */ | |
50fbe0cc | 107 | void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh) |
6f8941a2 | 108 | { |
50fbe0cc | 109 | unsigned int se, sh, cu; |
6f8941a2 NH |
110 | const char *p; |
111 | ||
112 | memset(mask, 0, sizeof(*mask) * max_se * max_sh); | |
113 | ||
114 | if (!amdgpu_disable_cu || !*amdgpu_disable_cu) | |
115 | return; | |
116 | ||
117 | p = amdgpu_disable_cu; | |
118 | for (;;) { | |
119 | char *next; | |
120 | int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu); | |
50fbe0cc | 121 | |
6f8941a2 NH |
122 | if (ret < 3) { |
123 | DRM_ERROR("amdgpu: could not parse disable_cu\n"); | |
124 | return; | |
125 | } | |
126 | ||
127 | if (se < max_se && sh < max_sh && cu < 16) { | |
128 | DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu); | |
129 | mask[se * max_sh + sh] |= 1u << cu; | |
130 | } else { | |
131 | DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n", | |
132 | se, sh, cu); | |
133 | } | |
134 | ||
135 | next = strchr(p, ','); | |
136 | if (!next) | |
137 | break; | |
138 | p = next + 1; | |
139 | } | |
140 | } | |
41f6a99a | 141 | |
b07d1d73 APS |
142 | static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev) |
143 | { | |
144 | return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1; | |
145 | } | |
146 | ||
147 | static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) | |
0f7607d4 | 148 | { |
4a75aefe AR |
149 | if (amdgpu_compute_multipipe != -1) { |
150 | DRM_INFO("amdgpu: forcing compute pipe policy %d\n", | |
151 | amdgpu_compute_multipipe); | |
152 | return amdgpu_compute_multipipe == 1; | |
153 | } | |
25959dd6 | 154 | |
4e8303cf | 155 | if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) |
25959dd6 | 156 | return true; |
4a75aefe | 157 | |
0f7607d4 AR |
158 | /* FIXME: spreading the queues across pipes causes perf regressions |
159 | * on POLARIS11 compute workloads */ | |
160 | if (adev->asic_type == CHIP_POLARIS11) | |
161 | return false; | |
162 | ||
163 | return adev->gfx.mec.num_mec > 1; | |
164 | } | |
165 | ||
b07d1d73 APS |
166 | bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, |
167 | struct amdgpu_ring *ring) | |
168 | { | |
169 | int queue = ring->queue; | |
170 | int pipe = ring->pipe; | |
171 | ||
172 | /* Policy: use pipe1 queue0 as high priority graphics queue if we | |
173 | * have more than one gfx pipe. | |
174 | */ | |
175 | if (amdgpu_gfx_is_graphics_multipipe_capable(adev) && | |
176 | adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) { | |
177 | int me = ring->me; | |
178 | int bit; | |
179 | ||
180 | bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue); | |
181 | if (ring == &adev->gfx.gfx_ring[bit]) | |
182 | return true; | |
183 | } | |
184 | ||
185 | return false; | |
186 | } | |
187 | ||
33abcb1f | 188 | bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, |
8c0225d7 | 189 | struct amdgpu_ring *ring) |
33abcb1f | 190 | { |
8c0225d7 ND |
191 | /* Policy: use 1st queue as high priority compute queue if we |
192 | * have more than one compute queue. | |
193 | */ | |
194 | if (adev->gfx.num_compute_rings > 1 && | |
195 | ring == &adev->gfx.compute_ring[0]) | |
196 | return true; | |
197 | ||
198 | return false; | |
33abcb1f ND |
199 | } |
200 | ||
41f6a99a AD |
201 | void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) |
202 | { | |
be697aa3 | 203 | int i, j, queue, pipe; |
b07d1d73 | 204 | bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev); |
a300de40 ML |
205 | int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * |
206 | adev->gfx.mec.num_queue_per_pipe, | |
207 | adev->gfx.num_compute_rings); | |
8078f1c6 | 208 | int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; |
a300de40 ML |
209 | |
210 | if (multipipe_policy) { | |
be697aa3 LM |
211 | /* policy: make queues evenly cross all pipes on MEC1 only |
212 | * for multiple xcc, just use the original policy for simplicity */ | |
8078f1c6 | 213 | for (j = 0; j < num_xcc; j++) { |
be697aa3 LM |
214 | for (i = 0; i < max_queues_per_mec; i++) { |
215 | pipe = i % adev->gfx.mec.num_pipe_per_mec; | |
216 | queue = (i / adev->gfx.mec.num_pipe_per_mec) % | |
217 | adev->gfx.mec.num_queue_per_pipe; | |
218 | ||
219 | set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, | |
220 | adev->gfx.mec_bitmap[j].queue_bitmap); | |
221 | } | |
41f6a99a | 222 | } |
a300de40 ML |
223 | } else { |
224 | /* policy: amdgpu owns all queues in the given pipe */ | |
8078f1c6 | 225 | for (j = 0; j < num_xcc; j++) { |
be697aa3 LM |
226 | for (i = 0; i < max_queues_per_mec; ++i) |
227 | set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap); | |
228 | } | |
41f6a99a AD |
229 | } |
230 | ||
8078f1c6 | 231 | for (j = 0; j < num_xcc; j++) { |
be697aa3 LM |
232 | dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", |
233 | bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); | |
234 | } | |
41f6a99a | 235 | } |
71c37505 | 236 | |
e537c994 HZ |
237 | void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) |
238 | { | |
b07d1d73 APS |
239 | int i, queue, pipe; |
240 | bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev); | |
241 | int max_queues_per_me = adev->gfx.me.num_pipe_per_me * | |
242 | adev->gfx.me.num_queue_per_pipe; | |
e537c994 | 243 | |
b07d1d73 | 244 | if (multipipe_policy) { |
e537c994 HZ |
245 | /* policy: amdgpu owns the first queue per pipe at this stage |
246 | * will extend to mulitple queues per pipe later */ | |
b07d1d73 APS |
247 | for (i = 0; i < max_queues_per_me; i++) { |
248 | pipe = i % adev->gfx.me.num_pipe_per_me; | |
249 | queue = (i / adev->gfx.me.num_pipe_per_me) % | |
250 | adev->gfx.me.num_queue_per_pipe; | |
251 | ||
252 | set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue, | |
61243c17 | 253 | adev->gfx.me.queue_bitmap); |
b07d1d73 APS |
254 | } |
255 | } else { | |
256 | for (i = 0; i < max_queues_per_me; ++i) | |
e537c994 HZ |
257 | set_bit(i, adev->gfx.me.queue_bitmap); |
258 | } | |
259 | ||
260 | /* update the number of active graphics rings */ | |
261 | adev->gfx.num_gfx_rings = | |
262 | bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); | |
263 | } | |
264 | ||
71c37505 | 265 | static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, |
def799c6 | 266 | struct amdgpu_ring *ring, int xcc_id) |
71c37505 AD |
267 | { |
268 | int queue_bit; | |
269 | int mec, pipe, queue; | |
270 | ||
271 | queue_bit = adev->gfx.mec.num_mec | |
272 | * adev->gfx.mec.num_pipe_per_mec | |
273 | * adev->gfx.mec.num_queue_per_pipe; | |
274 | ||
1647b54e | 275 | while (--queue_bit >= 0) { |
def799c6 | 276 | if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap)) |
71c37505 AD |
277 | continue; |
278 | ||
5c180eb9 | 279 | amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); |
71c37505 | 280 | |
59fd27cd HR |
281 | /* |
282 | * 1. Using pipes 2/3 from MEC 2 seems cause problems. | |
283 | * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN | |
284 | * only can be issued on queue 0. | |
285 | */ | |
286 | if ((mec == 1 && pipe > 1) || queue != 0) | |
71c37505 AD |
287 | continue; |
288 | ||
289 | ring->me = mec + 1; | |
290 | ring->pipe = pipe; | |
291 | ring->queue = queue; | |
292 | ||
293 | return 0; | |
294 | } | |
295 | ||
296 | dev_err(adev->dev, "Failed to find a queue for KIQ\n"); | |
297 | return -EINVAL; | |
298 | } | |
299 | ||
4acd31e6 | 300 | int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id) |
71c37505 | 301 | { |
def799c6 | 302 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; |
4acd31e6 MJ |
303 | struct amdgpu_irq_src *irq = &kiq->irq; |
304 | struct amdgpu_ring *ring = &kiq->ring; | |
71c37505 AD |
305 | int r = 0; |
306 | ||
43ca8efa | 307 | spin_lock_init(&kiq->ring_lock); |
71c37505 | 308 | |
71c37505 AD |
309 | ring->adev = NULL; |
310 | ring->ring_obj = NULL; | |
311 | ring->use_doorbell = true; | |
def799c6 | 312 | ring->xcc_id = xcc_id; |
3566938b | 313 | ring->vm_hub = AMDGPU_GFXHUB(xcc_id); |
233bb373 LL |
314 | ring->doorbell_index = |
315 | (adev->doorbell_index.kiq + | |
316 | xcc_id * adev->doorbell_index.xcc_doorbell_range) | |
317 | << 1; | |
71c37505 | 318 | |
def799c6 | 319 | r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id); |
71c37505 AD |
320 | if (r) |
321 | return r; | |
322 | ||
323 | ring->eop_gpu_addr = kiq->eop_gpu_addr; | |
a783910d | 324 | ring->no_scheduler = true; |
0ea55445 | 325 | snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu", |
745f7170 SS |
326 | (unsigned char)xcc_id, (unsigned char)ring->me, |
327 | (unsigned char)ring->pipe, (unsigned char)ring->queue); | |
c107171b CK |
328 | r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, |
329 | AMDGPU_RING_PRIO_DEFAULT, NULL); | |
71c37505 AD |
330 | if (r) |
331 | dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); | |
332 | ||
333 | return r; | |
334 | } | |
335 | ||
9f0256da | 336 | void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) |
71c37505 | 337 | { |
71c37505 AD |
338 | amdgpu_ring_fini(ring); |
339 | } | |
340 | ||
def799c6 | 341 | void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id) |
71c37505 | 342 | { |
def799c6 | 343 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; |
71c37505 AD |
344 | |
345 | amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); | |
346 | } | |
347 | ||
348 | int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, | |
50fbe0cc | 349 | unsigned int hpd_size, int xcc_id) |
71c37505 AD |
350 | { |
351 | int r; | |
352 | u32 *hpd; | |
def799c6 | 353 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; |
71c37505 AD |
354 | |
355 | r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE, | |
356 | AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, | |
357 | &kiq->eop_gpu_addr, (void **)&hpd); | |
358 | if (r) { | |
359 | dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); | |
360 | return r; | |
361 | } | |
362 | ||
363 | memset(hpd, 0, hpd_size); | |
364 | ||
365 | r = amdgpu_bo_reserve(kiq->eop_obj, true); | |
366 | if (unlikely(r != 0)) | |
367 | dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); | |
368 | amdgpu_bo_kunmap(kiq->eop_obj); | |
369 | amdgpu_bo_unreserve(kiq->eop_obj); | |
370 | ||
371 | return 0; | |
372 | } | |
b9683c21 | 373 | |
4fc6a88f HZ |
374 | /* create MQD for each compute/gfx queue */ |
375 | int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, | |
50fbe0cc | 376 | unsigned int mqd_size, int xcc_id) |
b9683c21 | 377 | { |
34305ac3 | 378 | int r, i, j; |
def799c6 LM |
379 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; |
380 | struct amdgpu_ring *ring = &kiq->ring; | |
1cfb4d61 AD |
381 | u32 domain = AMDGPU_GEM_DOMAIN_GTT; |
382 | ||
ba0fb4b4 | 383 | #if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64) |
1cfb4d61 | 384 | /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */ |
4e8303cf | 385 | if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0)) |
1cfb4d61 | 386 | domain |= AMDGPU_GEM_DOMAIN_VRAM; |
ba0fb4b4 | 387 | #endif |
b9683c21 AD |
388 | |
389 | /* create MQD for KIQ */ | |
18ee4ce6 | 390 | if (!adev->enable_mes_kiq && !ring->mqd_obj) { |
beb84102 ML |
391 | /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must |
392 | * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD | |
393 | * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for | |
394 | * KIQ MQD no matter SRIOV or Bare-metal | |
395 | */ | |
b9683c21 | 396 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, |
58ab2c08 CK |
397 | AMDGPU_GEM_DOMAIN_VRAM | |
398 | AMDGPU_GEM_DOMAIN_GTT, | |
399 | &ring->mqd_obj, | |
400 | &ring->mqd_gpu_addr, | |
401 | &ring->mqd_ptr); | |
b9683c21 AD |
402 | if (r) { |
403 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); | |
404 | return r; | |
405 | } | |
406 | ||
407 | /* prepare MQD backup */ | |
8e3a3e84 | 408 | kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL); |
37c3fc66 SS |
409 | if (!kiq->mqd_backup) { |
410 | dev_warn(adev->dev, | |
411 | "no memory to create MQD backup for ring %s\n", ring->name); | |
412 | return -ENOMEM; | |
413 | } | |
b9683c21 AD |
414 | } |
415 | ||
5e0f378d | 416 | if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { |
54fc4472 HZ |
417 | /* create MQD for each KGQ */ |
418 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { | |
419 | ring = &adev->gfx.gfx_ring[i]; | |
420 | if (!ring->mqd_obj) { | |
421 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, | |
1cfb4d61 | 422 | domain, &ring->mqd_obj, |
54fc4472 HZ |
423 | &ring->mqd_gpu_addr, &ring->mqd_ptr); |
424 | if (r) { | |
425 | dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); | |
426 | return r; | |
427 | } | |
428 | ||
b185c318 | 429 | ring->mqd_size = mqd_size; |
54fc4472 | 430 | /* prepare MQD backup */ |
8e3a3e84 | 431 | adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL); |
37c3fc66 | 432 | if (!adev->gfx.me.mqd_backup[i]) { |
54fc4472 | 433 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); |
37c3fc66 SS |
434 | return -ENOMEM; |
435 | } | |
54fc4472 HZ |
436 | } |
437 | } | |
438 | } | |
439 | ||
b9683c21 AD |
440 | /* create MQD for each KCQ */ |
441 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | |
34305ac3 GC |
442 | j = i + xcc_id * adev->gfx.num_compute_rings; |
443 | ring = &adev->gfx.compute_ring[j]; | |
b9683c21 AD |
444 | if (!ring->mqd_obj) { |
445 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, | |
1cfb4d61 | 446 | domain, &ring->mqd_obj, |
b9683c21 AD |
447 | &ring->mqd_gpu_addr, &ring->mqd_ptr); |
448 | if (r) { | |
54fc4472 | 449 | dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); |
b9683c21 AD |
450 | return r; |
451 | } | |
452 | ||
b185c318 | 453 | ring->mqd_size = mqd_size; |
b9683c21 | 454 | /* prepare MQD backup */ |
8e3a3e84 | 455 | adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL); |
50fbe0cc | 456 | if (!adev->gfx.mec.mqd_backup[j]) { |
b9683c21 | 457 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); |
50fbe0cc SS |
458 | return -ENOMEM; |
459 | } | |
b9683c21 AD |
460 | } |
461 | } | |
462 | ||
463 | return 0; | |
464 | } | |
465 | ||
def799c6 | 466 | void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id) |
b9683c21 AD |
467 | { |
468 | struct amdgpu_ring *ring = NULL; | |
def799c6 LM |
469 | int i, j; |
470 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; | |
b9683c21 | 471 | |
5e0f378d | 472 | if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { |
54fc4472 HZ |
473 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
474 | ring = &adev->gfx.gfx_ring[i]; | |
475 | kfree(adev->gfx.me.mqd_backup[i]); | |
476 | amdgpu_bo_free_kernel(&ring->mqd_obj, | |
477 | &ring->mqd_gpu_addr, | |
478 | &ring->mqd_ptr); | |
479 | } | |
480 | } | |
481 | ||
b9683c21 | 482 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
def799c6 | 483 | j = i + xcc_id * adev->gfx.num_compute_rings; |
7a4685cd GC |
484 | ring = &adev->gfx.compute_ring[j]; |
485 | kfree(adev->gfx.mec.mqd_backup[j]); | |
b9683c21 AD |
486 | amdgpu_bo_free_kernel(&ring->mqd_obj, |
487 | &ring->mqd_gpu_addr, | |
488 | &ring->mqd_ptr); | |
489 | } | |
490 | ||
def799c6 LM |
491 | ring = &kiq->ring; |
492 | kfree(kiq->mqd_backup); | |
b9683c21 AD |
493 | amdgpu_bo_free_kernel(&ring->mqd_obj, |
494 | &ring->mqd_gpu_addr, | |
495 | &ring->mqd_ptr); | |
496 | } | |
d23ee13f | 497 | |
def799c6 | 498 | int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) |
ba0c13b7 | 499 | { |
def799c6 | 500 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; |
ba0c13b7 | 501 | struct amdgpu_ring *kiq_ring = &kiq->ring; |
18ee4ce6 | 502 | int i, r = 0; |
def799c6 | 503 | int j; |
ba0c13b7 | 504 | |
f7fb9d67 JX |
505 | if (adev->enable_mes) { |
506 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | |
507 | j = i + xcc_id * adev->gfx.num_compute_rings; | |
508 | amdgpu_mes_unmap_legacy_queue(adev, | |
509 | &adev->gfx.compute_ring[j], | |
510 | RESET_QUEUES, 0, 0); | |
511 | } | |
512 | return 0; | |
513 | } | |
514 | ||
ba0c13b7 RZ |
515 | if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) |
516 | return -EINVAL; | |
517 | ||
def799c6 | 518 | spin_lock(&kiq->ring_lock); |
147862d0 SZ |
519 | if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * |
520 | adev->gfx.num_compute_rings)) { | |
521 | spin_unlock(&kiq->ring_lock); | |
522 | return -ENOMEM; | |
523 | } | |
ba0c13b7 | 524 | |
147862d0 SZ |
525 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
526 | j = i + xcc_id * adev->gfx.num_compute_rings; | |
527 | kiq->pmf->kiq_unmap_queues(kiq_ring, | |
8cce1682 | 528 | &adev->gfx.compute_ring[j], |
147862d0 | 529 | RESET_QUEUES, 0, 0); |
def799c6 | 530 | } |
18ee4ce6 | 531 | |
b1338a8e SY |
532 | /** |
533 | * This is workaround: only skip kiq_ring test | |
534 | * during ras recovery in suspend stage for gfx9.4.3 | |
535 | */ | |
5f571c61 | 536 | if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || |
7e437167 TZ |
537 | amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) && |
538 | amdgpu_ras_in_recovery(adev)) { | |
b1338a8e SY |
539 | spin_unlock(&kiq->ring_lock); |
540 | return 0; | |
541 | } | |
542 | ||
147862d0 | 543 | if (kiq_ring->sched.ready && !adev->job_hang) |
18ee4ce6 | 544 | r = amdgpu_ring_test_helper(kiq_ring); |
def799c6 | 545 | spin_unlock(&kiq->ring_lock); |
ba0c13b7 | 546 | |
5a8cd98e | 547 | return r; |
ba0c13b7 RZ |
548 | } |
549 | ||
1156e1a6 AD |
550 | int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) |
551 | { | |
552 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; | |
553 | struct amdgpu_ring *kiq_ring = &kiq->ring; | |
554 | int i, r = 0; | |
555 | int j; | |
556 | ||
f7fb9d67 JX |
557 | if (adev->enable_mes) { |
558 | if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { | |
559 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { | |
560 | j = i + xcc_id * adev->gfx.num_gfx_rings; | |
561 | amdgpu_mes_unmap_legacy_queue(adev, | |
562 | &adev->gfx.gfx_ring[j], | |
563 | PREEMPT_QUEUES, 0, 0); | |
564 | } | |
565 | } | |
566 | return 0; | |
567 | } | |
568 | ||
1156e1a6 AD |
569 | if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) |
570 | return -EINVAL; | |
571 | ||
572 | spin_lock(&kiq->ring_lock); | |
573 | if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { | |
574 | if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * | |
575 | adev->gfx.num_gfx_rings)) { | |
576 | spin_unlock(&kiq->ring_lock); | |
577 | return -ENOMEM; | |
578 | } | |
579 | ||
580 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { | |
581 | j = i + xcc_id * adev->gfx.num_gfx_rings; | |
582 | kiq->pmf->kiq_unmap_queues(kiq_ring, | |
8cce1682 | 583 | &adev->gfx.gfx_ring[j], |
1156e1a6 AD |
584 | PREEMPT_QUEUES, 0, 0); |
585 | } | |
586 | } | |
587 | ||
588 | if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang) | |
589 | r = amdgpu_ring_test_helper(kiq_ring); | |
590 | spin_unlock(&kiq->ring_lock); | |
591 | ||
592 | return r; | |
593 | } | |
594 | ||
5c180eb9 | 595 | int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, |
3ab6fe4b LG |
596 | int queue_bit) |
597 | { | |
598 | int mec, pipe, queue; | |
5c180eb9 | 599 | int set_resource_bit = 0; |
3ab6fe4b | 600 | |
5c180eb9 | 601 | amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); |
3ab6fe4b | 602 | |
5c180eb9 | 603 | set_resource_bit = mec * 4 * 8 + pipe * 8 + queue; |
3ab6fe4b | 604 | |
5c180eb9 | 605 | return set_resource_bit; |
3ab6fe4b LG |
606 | } |
607 | ||
745e0a90 JX |
608 | static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id) |
609 | { | |
610 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; | |
611 | struct amdgpu_ring *kiq_ring = &kiq->ring; | |
612 | uint64_t queue_mask = ~0ULL; | |
613 | int r, i, j; | |
614 | ||
615 | amdgpu_device_flush_hdp(adev, NULL); | |
616 | ||
617 | if (!adev->enable_uni_mes) { | |
618 | spin_lock(&kiq->ring_lock); | |
619 | r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size); | |
620 | if (r) { | |
621 | dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r); | |
622 | spin_unlock(&kiq->ring_lock); | |
623 | return r; | |
624 | } | |
625 | ||
626 | kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); | |
627 | r = amdgpu_ring_test_helper(kiq_ring); | |
628 | spin_unlock(&kiq->ring_lock); | |
629 | if (r) | |
630 | dev_err(adev->dev, "KIQ failed to set resources\n"); | |
631 | } | |
632 | ||
633 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | |
634 | j = i + xcc_id * adev->gfx.num_compute_rings; | |
635 | r = amdgpu_mes_map_legacy_queue(adev, | |
636 | &adev->gfx.compute_ring[j]); | |
637 | if (r) { | |
638 | dev_err(adev->dev, "failed to map compute queue\n"); | |
639 | return r; | |
640 | } | |
641 | } | |
642 | ||
643 | return 0; | |
644 | } | |
645 | ||
def799c6 | 646 | int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) |
849aca9f | 647 | { |
def799c6 LM |
648 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; |
649 | struct amdgpu_ring *kiq_ring = &kiq->ring; | |
849aca9f | 650 | uint64_t queue_mask = 0; |
def799c6 | 651 | int r, i, j; |
849aca9f | 652 | |
52491d97 | 653 | if (adev->mes.enable_legacy_queue_map) |
745e0a90 JX |
654 | return amdgpu_gfx_mes_enable_kcq(adev, xcc_id); |
655 | ||
849aca9f HZ |
656 | if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) |
657 | return -EINVAL; | |
658 | ||
659 | for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { | |
def799c6 | 660 | if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap)) |
849aca9f HZ |
661 | continue; |
662 | ||
663 | /* This situation may be hit in the future if a new HW | |
664 | * generation exposes more than 64 queues. If so, the | |
665 | * definition of queue_mask needs updating */ | |
666 | if (WARN_ON(i > (sizeof(queue_mask)*8))) { | |
667 | DRM_ERROR("Invalid KCQ enabled: %d\n", i); | |
668 | break; | |
669 | } | |
670 | ||
5c180eb9 | 671 | queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i)); |
849aca9f HZ |
672 | } |
673 | ||
e602157e JX |
674 | amdgpu_device_flush_hdp(adev, NULL); |
675 | ||
f9d8c5c7 JX |
676 | DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, |
677 | kiq_ring->queue); | |
678 | ||
def799c6 | 679 | spin_lock(&kiq->ring_lock); |
147862d0 SZ |
680 | r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * |
681 | adev->gfx.num_compute_rings + | |
682 | kiq->pmf->set_resources_size); | |
683 | if (r) { | |
684 | DRM_ERROR("Failed to lock KIQ (%d).\n", r); | |
685 | spin_unlock(&kiq->ring_lock); | |
686 | return r; | |
687 | } | |
849aca9f | 688 | |
147862d0 | 689 | kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); |
745e0a90 JX |
690 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
691 | j = i + xcc_id * adev->gfx.num_compute_rings; | |
692 | kiq->pmf->kiq_map_queues(kiq_ring, | |
693 | &adev->gfx.compute_ring[j]); | |
def799c6 | 694 | } |
849aca9f HZ |
695 | |
696 | r = amdgpu_ring_test_helper(kiq_ring); | |
def799c6 | 697 | spin_unlock(&kiq->ring_lock); |
849aca9f HZ |
698 | if (r) |
699 | DRM_ERROR("KCQ enable failed\n"); | |
700 | ||
701 | return r; | |
702 | } | |
703 | ||
1156e1a6 AD |
704 | int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) |
705 | { | |
706 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; | |
707 | struct amdgpu_ring *kiq_ring = &kiq->ring; | |
708 | int r, i, j; | |
709 | ||
710 | if (!kiq->pmf || !kiq->pmf->kiq_map_queues) | |
711 | return -EINVAL; | |
712 | ||
e602157e JX |
713 | amdgpu_device_flush_hdp(adev, NULL); |
714 | ||
52491d97 | 715 | if (adev->mes.enable_legacy_queue_map) { |
f9d8c5c7 JX |
716 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
717 | j = i + xcc_id * adev->gfx.num_gfx_rings; | |
718 | r = amdgpu_mes_map_legacy_queue(adev, | |
719 | &adev->gfx.gfx_ring[j]); | |
720 | if (r) { | |
721 | DRM_ERROR("failed to map gfx queue\n"); | |
722 | return r; | |
723 | } | |
724 | } | |
725 | ||
726 | return 0; | |
727 | } | |
728 | ||
1156e1a6 AD |
729 | spin_lock(&kiq->ring_lock); |
730 | /* No need to map kcq on the slave */ | |
731 | if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { | |
732 | r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * | |
733 | adev->gfx.num_gfx_rings); | |
734 | if (r) { | |
735 | DRM_ERROR("Failed to lock KIQ (%d).\n", r); | |
3fb9dd5f | 736 | spin_unlock(&kiq->ring_lock); |
1156e1a6 AD |
737 | return r; |
738 | } | |
739 | ||
740 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { | |
741 | j = i + xcc_id * adev->gfx.num_gfx_rings; | |
742 | kiq->pmf->kiq_map_queues(kiq_ring, | |
8cce1682 | 743 | &adev->gfx.gfx_ring[j]); |
1156e1a6 AD |
744 | } |
745 | } | |
746 | ||
747 | r = amdgpu_ring_test_helper(kiq_ring); | |
748 | spin_unlock(&kiq->ring_lock); | |
749 | if (r) | |
43bda3e7 | 750 | DRM_ERROR("KGQ enable failed\n"); |
1156e1a6 AD |
751 | |
752 | return r; | |
753 | } | |
754 | ||
d23ee13f RZ |
755 | /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable |
756 | * | |
757 | * @adev: amdgpu_device pointer | |
758 | * @bool enable true: enable gfx off feature, false: disable gfx off feature | |
759 | * | |
760 | * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled. | |
761 | * 2. other client can send request to disable gfx off feature, the request should be honored. | |
762 | * 3. other client can cancel their request of disable gfx off feature | |
763 | * 4. other client should not send request to enable gfx off feature before disable gfx off feature. | |
764 | */ | |
765 | ||
766 | void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) | |
767 | { | |
1d617c02 LL |
768 | unsigned long delay = GFX_OFF_DELAY_ENABLE; |
769 | ||
3b94fb10 | 770 | if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) |
d23ee13f RZ |
771 | return; |
772 | ||
d23ee13f RZ |
773 | mutex_lock(&adev->gfx.gfx_off_mutex); |
774 | ||
90a92662 MD |
775 | if (enable) { |
776 | /* If the count is already 0, it means there's an imbalance bug somewhere. | |
777 | * Note that the bug may be in a different caller than the one which triggers the | |
778 | * WARN_ON_ONCE. | |
779 | */ | |
780 | if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0)) | |
781 | goto unlock; | |
782 | ||
d23ee13f RZ |
783 | adev->gfx.gfx_off_req_count--; |
784 | ||
1d617c02 LL |
785 | if (adev->gfx.gfx_off_req_count == 0 && |
786 | !adev->gfx.gfx_off_state) { | |
ce311df9 ML |
787 | /* If going to s2idle, no need to wait */ |
788 | if (adev->in_s0ix) { | |
789 | if (!amdgpu_dpm_set_powergating_by_smu(adev, | |
790 | AMD_IP_BLOCK_TYPE_GFX, true)) | |
791 | adev->gfx.gfx_off_state = true; | |
792 | } else { | |
793 | schedule_delayed_work(&adev->gfx.gfx_off_delay_work, | |
1d617c02 | 794 | delay); |
ce311df9 | 795 | } |
1d617c02 | 796 | } |
90a92662 MD |
797 | } else { |
798 | if (adev->gfx.gfx_off_req_count == 0) { | |
799 | cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); | |
800 | ||
801 | if (adev->gfx.gfx_off_state && | |
802 | !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { | |
803 | adev->gfx.gfx_off_state = false; | |
425a78f4 | 804 | |
90a92662 MD |
805 | if (adev->gfx.funcs->init_spm_golden) { |
806 | dev_dbg(adev->dev, | |
807 | "GFXOFF is disabled, re-init SPM golden settings\n"); | |
808 | amdgpu_gfx_init_spm_golden(adev); | |
809 | } | |
425a78f4 TY |
810 | } |
811 | } | |
90a92662 MD |
812 | |
813 | adev->gfx.gfx_off_req_count++; | |
d23ee13f | 814 | } |
1e317b99 | 815 | |
90a92662 | 816 | unlock: |
d23ee13f RZ |
817 | mutex_unlock(&adev->gfx.gfx_off_mutex); |
818 | } | |
6caeee7a | 819 | |
0ad7347a AA |
820 | int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value) |
821 | { | |
822 | int r = 0; | |
823 | ||
824 | mutex_lock(&adev->gfx.gfx_off_mutex); | |
825 | ||
826 | r = amdgpu_dpm_set_residency_gfxoff(adev, value); | |
827 | ||
828 | mutex_unlock(&adev->gfx.gfx_off_mutex); | |
829 | ||
830 | return r; | |
831 | } | |
832 | ||
833 | int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value) | |
834 | { | |
835 | int r = 0; | |
836 | ||
837 | mutex_lock(&adev->gfx.gfx_off_mutex); | |
838 | ||
839 | r = amdgpu_dpm_get_residency_gfxoff(adev, value); | |
840 | ||
841 | mutex_unlock(&adev->gfx.gfx_off_mutex); | |
842 | ||
843 | return r; | |
844 | } | |
845 | ||
846 | int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value) | |
847 | { | |
848 | int r = 0; | |
849 | ||
850 | mutex_lock(&adev->gfx.gfx_off_mutex); | |
851 | ||
852 | r = amdgpu_dpm_get_entrycount_gfxoff(adev, value); | |
853 | ||
854 | mutex_unlock(&adev->gfx.gfx_off_mutex); | |
855 | ||
856 | return r; | |
857 | } | |
858 | ||
443c7f3c JS |
859 | int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) |
860 | { | |
861 | ||
862 | int r = 0; | |
863 | ||
864 | mutex_lock(&adev->gfx.gfx_off_mutex); | |
865 | ||
bc143d8b | 866 | r = amdgpu_dpm_get_status_gfxoff(adev, value); |
443c7f3c JS |
867 | |
868 | mutex_unlock(&adev->gfx.gfx_off_mutex); | |
869 | ||
870 | return r; | |
871 | } | |
872 | ||
4e9b1fa5 | 873 | int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) |
6caeee7a HZ |
874 | { |
875 | int r; | |
6caeee7a | 876 | |
caae42f0 | 877 | if (amdgpu_ras_is_supported(adev, ras_block->block)) { |
c0277b9d TH |
878 | if (!amdgpu_persistent_edc_harvesting_supported(adev)) { |
879 | r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); | |
880 | if (r) | |
881 | return r; | |
882 | } | |
761d86d3 | 883 | |
2a460963 CL |
884 | r = amdgpu_ras_block_late_init(adev, ras_block); |
885 | if (r) | |
886 | return r; | |
887 | ||
2f48965b HZ |
888 | if (adev->gfx.cp_ecc_error_irq.funcs) { |
889 | r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); | |
890 | if (r) | |
891 | goto late_fini; | |
892 | } | |
2a460963 CL |
893 | } else { |
894 | amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); | |
6caeee7a HZ |
895 | } |
896 | ||
897 | return 0; | |
898 | late_fini: | |
caae42f0 | 899 | amdgpu_ras_block_late_fini(adev, ras_block); |
6caeee7a HZ |
900 | return r; |
901 | } | |
725253ab | 902 | |
89e4c448 YC |
903 | int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev) |
904 | { | |
905 | int err = 0; | |
906 | struct amdgpu_gfx_ras *ras = NULL; | |
907 | ||
908 | /* adev->gfx.ras is NULL, which means gfx does not | |
909 | * support ras function, then do nothing here. | |
910 | */ | |
911 | if (!adev->gfx.ras) | |
912 | return 0; | |
913 | ||
914 | ras = adev->gfx.ras; | |
915 | ||
916 | err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); | |
917 | if (err) { | |
918 | dev_err(adev->dev, "Failed to register gfx ras block!\n"); | |
919 | return err; | |
920 | } | |
921 | ||
922 | strcpy(ras->ras_block.ras_comm.name, "gfx"); | |
923 | ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX; | |
924 | ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; | |
925 | adev->gfx.ras_if = &ras->ras_block.ras_comm; | |
926 | ||
927 | /* If not define special ras_late_init function, use gfx default ras_late_init */ | |
928 | if (!ras->ras_block.ras_late_init) | |
af8312a3 | 929 | ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init; |
89e4c448 YC |
930 | |
931 | /* If not defined special ras_cb function, use default ras_cb */ | |
932 | if (!ras->ras_block.ras_cb) | |
933 | ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb; | |
934 | ||
935 | return 0; | |
936 | } | |
937 | ||
ac7b25d9 YC |
938 | int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, |
939 | struct amdgpu_iv_entry *entry) | |
940 | { | |
941 | if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler) | |
942 | return adev->gfx.ras->poison_consumption_handler(adev, entry); | |
943 | ||
944 | return 0; | |
945 | } | |
946 | ||
725253ab TZ |
947 | int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, |
948 | void *err_data, | |
949 | struct amdgpu_iv_entry *entry) | |
950 | { | |
3d8361b1 TZ |
951 | /* TODO ue will trigger an interrupt. |
952 | * | |
953 | * When “Full RAS” is enabled, the per-IP interrupt sources should | |
954 | * be disabled and the driver should only look for the aggregated | |
955 | * interrupt via sync flood | |
956 | */ | |
725253ab TZ |
957 | if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { |
958 | kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); | |
8b0fb0e9 | 959 | if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops && |
960 | adev->gfx.ras->ras_block.hw_ops->query_ras_error_count) | |
961 | adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data); | |
61934624 | 962 | amdgpu_ras_reset_gpu(adev); |
725253ab TZ |
963 | } |
964 | return AMDGPU_RAS_SUCCESS; | |
965 | } | |
966 | ||
967 | int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, | |
968 | struct amdgpu_irq_src *source, | |
969 | struct amdgpu_iv_entry *entry) | |
970 | { | |
971 | struct ras_common_if *ras_if = adev->gfx.ras_if; | |
972 | struct ras_dispatch_if ih_data = { | |
973 | .entry = entry, | |
974 | }; | |
975 | ||
976 | if (!ras_if) | |
977 | return 0; | |
978 | ||
979 | ih_data.head = *ras_if; | |
980 | ||
981 | DRM_ERROR("CP ECC ERROR IRQ\n"); | |
982 | amdgpu_ras_interrupt_dispatch(adev, &ih_data); | |
983 | return 0; | |
984 | } | |
d33a99c4 | 985 | |
d78c7132 TZ |
986 | void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, |
987 | void *ras_error_status, | |
988 | void (*func)(struct amdgpu_device *adev, void *ras_error_status, | |
989 | int xcc_id)) | |
990 | { | |
991 | int i; | |
992 | int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; | |
993 | uint32_t xcc_mask = GENMASK(num_xcc - 1, 0); | |
994 | struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; | |
995 | ||
996 | if (err_data) { | |
997 | err_data->ue_count = 0; | |
998 | err_data->ce_count = 0; | |
999 | } | |
1000 | ||
1001 | for_each_inst(i, xcc_mask) | |
1002 | func(adev, ras_error_status, i); | |
1003 | } | |
1004 | ||
85150626 | 1005 | uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id) |
d33a99c4 | 1006 | { |
1007 | signed long r, cnt = 0; | |
1008 | unsigned long flags; | |
54208194 | 1009 | uint32_t seq, reg_val_offs = 0, value = 0; |
85150626 | 1010 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; |
d33a99c4 | 1011 | struct amdgpu_ring *ring = &kiq->ring; |
1012 | ||
56b53c0b | 1013 | if (amdgpu_device_skip_hw_access(adev)) |
bf36b52e AG |
1014 | return 0; |
1015 | ||
c7d43556 | 1016 | if (adev->mes.ring[0].sched.ready) |
cf606729 JX |
1017 | return amdgpu_mes_rreg(adev, reg); |
1018 | ||
d33a99c4 | 1019 | BUG_ON(!ring->funcs->emit_rreg); |
1020 | ||
1021 | spin_lock_irqsave(&kiq->ring_lock, flags); | |
54208194 | 1022 | if (amdgpu_device_wb_get(adev, ®_val_offs)) { |
54208194 | 1023 | pr_err("critical bug! too many kiq readers\n"); |
04e4e2e9 | 1024 | goto failed_unlock; |
54208194 | 1025 | } |
c0277b9d TH |
1026 | r = amdgpu_ring_alloc(ring, 32); |
1027 | if (r) | |
1028 | goto failed_unlock; | |
1029 | ||
54208194 | 1030 | amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); |
04e4e2e9 YT |
1031 | r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); |
1032 | if (r) | |
1033 | goto failed_undo; | |
1034 | ||
d33a99c4 | 1035 | amdgpu_ring_commit(ring); |
1036 | spin_unlock_irqrestore(&kiq->ring_lock, flags); | |
1037 | ||
1038 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); | |
1039 | ||
1040 | /* don't wait anymore for gpu reset case because this way may | |
1041 | * block gpu_recover() routine forever, e.g. this virt_kiq_rreg | |
1042 | * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will | |
1043 | * never return if we keep waiting in virt_kiq_rreg, which cause | |
1044 | * gpu_recover() hang there. | |
1045 | * | |
1046 | * also don't wait anymore for IRQ context | |
1047 | * */ | |
53b3f8f4 | 1048 | if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) |
d33a99c4 | 1049 | goto failed_kiq_read; |
1050 | ||
1051 | might_sleep(); | |
1052 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { | |
1053 | msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); | |
1054 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); | |
1055 | } | |
1056 | ||
1057 | if (cnt > MAX_KIQ_REG_TRY) | |
1058 | goto failed_kiq_read; | |
1059 | ||
54208194 YT |
1060 | mb(); |
1061 | value = adev->wb.wb[reg_val_offs]; | |
1062 | amdgpu_device_wb_free(adev, reg_val_offs); | |
1063 | return value; | |
d33a99c4 | 1064 | |
04e4e2e9 YT |
1065 | failed_undo: |
1066 | amdgpu_ring_undo(ring); | |
1067 | failed_unlock: | |
1068 | spin_unlock_irqrestore(&kiq->ring_lock, flags); | |
d33a99c4 | 1069 | failed_kiq_read: |
04e4e2e9 YT |
1070 | if (reg_val_offs) |
1071 | amdgpu_device_wb_free(adev, reg_val_offs); | |
aac89168 | 1072 | dev_err(adev->dev, "failed to read reg:%x\n", reg); |
d33a99c4 | 1073 | return ~0; |
1074 | } | |
1075 | ||
85150626 | 1076 | void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id) |
d33a99c4 | 1077 | { |
1078 | signed long r, cnt = 0; | |
1079 | unsigned long flags; | |
1080 | uint32_t seq; | |
85150626 | 1081 | struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; |
d33a99c4 | 1082 | struct amdgpu_ring *ring = &kiq->ring; |
1083 | ||
1084 | BUG_ON(!ring->funcs->emit_wreg); | |
1085 | ||
56b53c0b | 1086 | if (amdgpu_device_skip_hw_access(adev)) |
bf36b52e AG |
1087 | return; |
1088 | ||
c7d43556 | 1089 | if (adev->mes.ring[0].sched.ready) { |
cf606729 JX |
1090 | amdgpu_mes_wreg(adev, reg, v); |
1091 | return; | |
1092 | } | |
1093 | ||
d33a99c4 | 1094 | spin_lock_irqsave(&kiq->ring_lock, flags); |
c0277b9d TH |
1095 | r = amdgpu_ring_alloc(ring, 32); |
1096 | if (r) | |
1097 | goto failed_unlock; | |
1098 | ||
d33a99c4 | 1099 | amdgpu_ring_emit_wreg(ring, reg, v); |
04e4e2e9 YT |
1100 | r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); |
1101 | if (r) | |
1102 | goto failed_undo; | |
1103 | ||
d33a99c4 | 1104 | amdgpu_ring_commit(ring); |
1105 | spin_unlock_irqrestore(&kiq->ring_lock, flags); | |
1106 | ||
1107 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); | |
1108 | ||
1109 | /* don't wait anymore for gpu reset case because this way may | |
1110 | * block gpu_recover() routine forever, e.g. this virt_kiq_rreg | |
1111 | * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will | |
1112 | * never return if we keep waiting in virt_kiq_rreg, which cause | |
1113 | * gpu_recover() hang there. | |
1114 | * | |
1115 | * also don't wait anymore for IRQ context | |
1116 | * */ | |
53b3f8f4 | 1117 | if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) |
d33a99c4 | 1118 | goto failed_kiq_write; |
1119 | ||
1120 | might_sleep(); | |
1121 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { | |
1122 | ||
1123 | msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); | |
1124 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); | |
1125 | } | |
1126 | ||
1127 | if (cnt > MAX_KIQ_REG_TRY) | |
1128 | goto failed_kiq_write; | |
1129 | ||
1130 | return; | |
1131 | ||
04e4e2e9 YT |
1132 | failed_undo: |
1133 | amdgpu_ring_undo(ring); | |
c0277b9d | 1134 | failed_unlock: |
04e4e2e9 | 1135 | spin_unlock_irqrestore(&kiq->ring_lock, flags); |
d33a99c4 | 1136 | failed_kiq_write: |
aac89168 | 1137 | dev_err(adev->dev, "failed to write reg:%x\n", reg); |
d33a99c4 | 1138 | } |
a3bab325 AD |
1139 | |
1140 | int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) | |
1141 | { | |
1142 | if (amdgpu_num_kcq == -1) { | |
1143 | return 8; | |
1144 | } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { | |
1145 | dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n"); | |
1146 | return 8; | |
1147 | } | |
1148 | return amdgpu_num_kcq; | |
1149 | } | |
ec71b250 LG |
1150 | |
1151 | void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, | |
2d89e2dd | 1152 | uint32_t ucode_id) |
ec71b250 LG |
1153 | { |
1154 | const struct gfx_firmware_header_v1_0 *cp_hdr; | |
1155 | const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0; | |
1156 | struct amdgpu_firmware_info *info = NULL; | |
1157 | const struct firmware *ucode_fw; | |
1158 | unsigned int fw_size; | |
1159 | ||
1160 | switch (ucode_id) { | |
1161 | case AMDGPU_UCODE_ID_CP_PFP: | |
1162 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |
1163 | adev->gfx.pfp_fw->data; | |
1164 | adev->gfx.pfp_fw_version = | |
1165 | le32_to_cpu(cp_hdr->header.ucode_version); | |
1166 | adev->gfx.pfp_feature_version = | |
1167 | le32_to_cpu(cp_hdr->ucode_feature_version); | |
1168 | ucode_fw = adev->gfx.pfp_fw; | |
1169 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); | |
1170 | break; | |
1171 | case AMDGPU_UCODE_ID_CP_RS64_PFP: | |
1172 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |
1173 | adev->gfx.pfp_fw->data; | |
1174 | adev->gfx.pfp_fw_version = | |
1175 | le32_to_cpu(cp_hdr_v2_0->header.ucode_version); | |
1176 | adev->gfx.pfp_feature_version = | |
1177 | le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); | |
1178 | ucode_fw = adev->gfx.pfp_fw; | |
1179 | fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); | |
1180 | break; | |
1181 | case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK: | |
1182 | case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK: | |
1183 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |
1184 | adev->gfx.pfp_fw->data; | |
1185 | ucode_fw = adev->gfx.pfp_fw; | |
1186 | fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); | |
1187 | break; | |
1188 | case AMDGPU_UCODE_ID_CP_ME: | |
1189 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |
1190 | adev->gfx.me_fw->data; | |
1191 | adev->gfx.me_fw_version = | |
1192 | le32_to_cpu(cp_hdr->header.ucode_version); | |
1193 | adev->gfx.me_feature_version = | |
1194 | le32_to_cpu(cp_hdr->ucode_feature_version); | |
1195 | ucode_fw = adev->gfx.me_fw; | |
1196 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); | |
1197 | break; | |
1198 | case AMDGPU_UCODE_ID_CP_RS64_ME: | |
1199 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |
1200 | adev->gfx.me_fw->data; | |
1201 | adev->gfx.me_fw_version = | |
1202 | le32_to_cpu(cp_hdr_v2_0->header.ucode_version); | |
1203 | adev->gfx.me_feature_version = | |
1204 | le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); | |
1205 | ucode_fw = adev->gfx.me_fw; | |
1206 | fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); | |
1207 | break; | |
1208 | case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK: | |
1209 | case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK: | |
1210 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |
1211 | adev->gfx.me_fw->data; | |
1212 | ucode_fw = adev->gfx.me_fw; | |
1213 | fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); | |
1214 | break; | |
1215 | case AMDGPU_UCODE_ID_CP_CE: | |
1216 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |
1217 | adev->gfx.ce_fw->data; | |
1218 | adev->gfx.ce_fw_version = | |
1219 | le32_to_cpu(cp_hdr->header.ucode_version); | |
1220 | adev->gfx.ce_feature_version = | |
1221 | le32_to_cpu(cp_hdr->ucode_feature_version); | |
1222 | ucode_fw = adev->gfx.ce_fw; | |
1223 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); | |
1224 | break; | |
1225 | case AMDGPU_UCODE_ID_CP_MEC1: | |
1226 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |
1227 | adev->gfx.mec_fw->data; | |
1228 | adev->gfx.mec_fw_version = | |
1229 | le32_to_cpu(cp_hdr->header.ucode_version); | |
1230 | adev->gfx.mec_feature_version = | |
1231 | le32_to_cpu(cp_hdr->ucode_feature_version); | |
1232 | ucode_fw = adev->gfx.mec_fw; | |
1233 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - | |
1234 | le32_to_cpu(cp_hdr->jt_size) * 4; | |
1235 | break; | |
1236 | case AMDGPU_UCODE_ID_CP_MEC1_JT: | |
1237 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |
1238 | adev->gfx.mec_fw->data; | |
1239 | ucode_fw = adev->gfx.mec_fw; | |
1240 | fw_size = le32_to_cpu(cp_hdr->jt_size) * 4; | |
1241 | break; | |
1242 | case AMDGPU_UCODE_ID_CP_MEC2: | |
1243 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |
1244 | adev->gfx.mec2_fw->data; | |
1245 | adev->gfx.mec2_fw_version = | |
1246 | le32_to_cpu(cp_hdr->header.ucode_version); | |
1247 | adev->gfx.mec2_feature_version = | |
1248 | le32_to_cpu(cp_hdr->ucode_feature_version); | |
1249 | ucode_fw = adev->gfx.mec2_fw; | |
1250 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - | |
1251 | le32_to_cpu(cp_hdr->jt_size) * 4; | |
1252 | break; | |
1253 | case AMDGPU_UCODE_ID_CP_MEC2_JT: | |
1254 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |
1255 | adev->gfx.mec2_fw->data; | |
1256 | ucode_fw = adev->gfx.mec2_fw; | |
1257 | fw_size = le32_to_cpu(cp_hdr->jt_size) * 4; | |
1258 | break; | |
1259 | case AMDGPU_UCODE_ID_CP_RS64_MEC: | |
1260 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |
1261 | adev->gfx.mec_fw->data; | |
1262 | adev->gfx.mec_fw_version = | |
1263 | le32_to_cpu(cp_hdr_v2_0->header.ucode_version); | |
1264 | adev->gfx.mec_feature_version = | |
1265 | le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); | |
1266 | ucode_fw = adev->gfx.mec_fw; | |
1267 | fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); | |
1268 | break; | |
1269 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK: | |
1270 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK: | |
1271 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK: | |
1272 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK: | |
1273 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |
1274 | adev->gfx.mec_fw->data; | |
1275 | ucode_fw = adev->gfx.mec_fw; | |
1276 | fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); | |
1277 | break; | |
1278 | default: | |
9a5f15d2 TH |
1279 | dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id); |
1280 | return; | |
ec71b250 LG |
1281 | } |
1282 | ||
1283 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { | |
1284 | info = &adev->firmware.ucode[ucode_id]; | |
1285 | info->ucode_id = ucode_id; | |
1286 | info->fw = ucode_fw; | |
1287 | adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE); | |
1288 | } | |
1289 | } | |
66daccde LM |
1290 | |
1291 | bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id) | |
1292 | { | |
1293 | return !(xcc_id % (adev->gfx.num_xcc_per_xcp ? | |
1294 | adev->gfx.num_xcc_per_xcp : 1)); | |
1295 | } | |
98a54e88 LM |
1296 | |
1297 | static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, | |
1298 | struct device_attribute *addr, | |
1299 | char *buf) | |
1300 | { | |
1301 | struct drm_device *ddev = dev_get_drvdata(dev); | |
1302 | struct amdgpu_device *adev = drm_to_adev(ddev); | |
8e7fd193 | 1303 | int mode; |
98a54e88 | 1304 | |
ded7d99e LL |
1305 | mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, |
1306 | AMDGPU_XCP_FL_NONE); | |
98a54e88 | 1307 | |
f9632096 | 1308 | return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode)); |
98a54e88 LM |
1309 | } |
1310 | ||
1311 | static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, | |
1312 | struct device_attribute *addr, | |
1313 | const char *buf, size_t count) | |
1314 | { | |
1315 | struct drm_device *ddev = dev_get_drvdata(dev); | |
1316 | struct amdgpu_device *adev = drm_to_adev(ddev); | |
1317 | enum amdgpu_gfx_partition mode; | |
8078f1c6 | 1318 | int ret = 0, num_xcc; |
98a54e88 | 1319 | |
8078f1c6 LL |
1320 | num_xcc = NUM_XCC(adev->gfx.xcc_mask); |
1321 | if (num_xcc % 2 != 0) | |
98a54e88 LM |
1322 | return -EINVAL; |
1323 | ||
1324 | if (!strncasecmp("SPX", buf, strlen("SPX"))) { | |
1325 | mode = AMDGPU_SPX_PARTITION_MODE; | |
1326 | } else if (!strncasecmp("DPX", buf, strlen("DPX"))) { | |
cb30544e MJ |
1327 | /* |
1328 | * DPX mode needs AIDs to be in multiple of 2. | |
1329 | * Each AID connects 2 XCCs. | |
1330 | */ | |
1331 | if (num_xcc%4) | |
98a54e88 LM |
1332 | return -EINVAL; |
1333 | mode = AMDGPU_DPX_PARTITION_MODE; | |
1334 | } else if (!strncasecmp("TPX", buf, strlen("TPX"))) { | |
8078f1c6 | 1335 | if (num_xcc != 6) |
98a54e88 LM |
1336 | return -EINVAL; |
1337 | mode = AMDGPU_TPX_PARTITION_MODE; | |
1338 | } else if (!strncasecmp("QPX", buf, strlen("QPX"))) { | |
8078f1c6 | 1339 | if (num_xcc != 8) |
98a54e88 LM |
1340 | return -EINVAL; |
1341 | mode = AMDGPU_QPX_PARTITION_MODE; | |
1342 | } else if (!strncasecmp("CPX", buf, strlen("CPX"))) { | |
1343 | mode = AMDGPU_CPX_PARTITION_MODE; | |
1344 | } else { | |
1345 | return -EINVAL; | |
1346 | } | |
1347 | ||
8e7fd193 | 1348 | ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode); |
98a54e88 LM |
1349 | |
1350 | if (ret) | |
1351 | return ret; | |
1352 | ||
1353 | return count; | |
1354 | } | |
1355 | ||
1bc0b339 LL |
1356 | static const char *xcp_desc[] = { |
1357 | [AMDGPU_SPX_PARTITION_MODE] = "SPX", | |
1358 | [AMDGPU_DPX_PARTITION_MODE] = "DPX", | |
1359 | [AMDGPU_TPX_PARTITION_MODE] = "TPX", | |
1360 | [AMDGPU_QPX_PARTITION_MODE] = "QPX", | |
1361 | [AMDGPU_CPX_PARTITION_MODE] = "CPX", | |
1362 | }; | |
1363 | ||
98a54e88 LM |
1364 | static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, |
1365 | struct device_attribute *addr, | |
1366 | char *buf) | |
1367 | { | |
1368 | struct drm_device *ddev = dev_get_drvdata(dev); | |
1369 | struct amdgpu_device *adev = drm_to_adev(ddev); | |
1bc0b339 LL |
1370 | struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; |
1371 | int size = 0, mode; | |
1372 | char *sep = ""; | |
98a54e88 | 1373 | |
1bc0b339 LL |
1374 | if (!xcp_mgr || !xcp_mgr->avail_xcp_modes) |
1375 | return sysfs_emit(buf, "Not supported\n"); | |
1376 | ||
1377 | for_each_inst(mode, xcp_mgr->avail_xcp_modes) { | |
1378 | size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]); | |
1379 | sep = ", "; | |
98a54e88 LM |
1380 | } |
1381 | ||
1bc0b339 LL |
1382 | size += sysfs_emit_at(buf, size, "\n"); |
1383 | ||
1384 | return size; | |
98a54e88 LM |
1385 | } |
1386 | ||
d361ad5d SS |
1387 | static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) |
1388 | { | |
1389 | struct amdgpu_device *adev = ring->adev; | |
559a2858 SS |
1390 | struct drm_gpu_scheduler *sched = &ring->sched; |
1391 | struct drm_sched_entity entity; | |
1392 | struct dma_fence *f; | |
d361ad5d SS |
1393 | struct amdgpu_job *job; |
1394 | struct amdgpu_ib *ib; | |
1395 | int i, r; | |
1396 | ||
559a2858 SS |
1397 | /* Initialize the scheduler entity */ |
1398 | r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL, | |
1399 | &sched, 1, NULL); | |
1400 | if (r) { | |
1401 | dev_err(adev->dev, "Failed setting up GFX kernel entity.\n"); | |
1402 | goto err; | |
1403 | } | |
1404 | ||
1405 | r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL, | |
1406 | 64, 0, | |
d361ad5d SS |
1407 | &job); |
1408 | if (r) | |
1409 | goto err; | |
1410 | ||
1411 | job->enforce_isolation = true; | |
1412 | ||
1413 | ib = &job->ibs[0]; | |
1414 | for (i = 0; i <= ring->funcs->align_mask; ++i) | |
1415 | ib->ptr[i] = ring->funcs->nop; | |
1416 | ib->length_dw = ring->funcs->align_mask + 1; | |
1417 | ||
559a2858 | 1418 | f = amdgpu_job_submit(job); |
d361ad5d | 1419 | |
559a2858 SS |
1420 | r = dma_fence_wait(f, false); |
1421 | if (r) | |
1422 | goto err; | |
d361ad5d | 1423 | |
d361ad5d SS |
1424 | dma_fence_put(f); |
1425 | ||
559a2858 SS |
1426 | /* Clean up the scheduler entity */ |
1427 | drm_sched_entity_destroy(&entity); | |
d361ad5d SS |
1428 | return 0; |
1429 | ||
d361ad5d SS |
1430 | err: |
1431 | return r; | |
1432 | } | |
1433 | ||
1434 | static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id) | |
1435 | { | |
1436 | int num_xcc = NUM_XCC(adev->gfx.xcc_mask); | |
1437 | struct amdgpu_ring *ring; | |
1438 | int num_xcc_to_clear; | |
1439 | int i, r, xcc_id; | |
1440 | ||
1441 | if (adev->gfx.num_xcc_per_xcp) | |
1442 | num_xcc_to_clear = adev->gfx.num_xcc_per_xcp; | |
1443 | else | |
1444 | num_xcc_to_clear = 1; | |
1445 | ||
1446 | for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { | |
1447 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | |
1448 | ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; | |
1449 | if ((ring->xcp_id == xcp_id) && ring->sched.ready) { | |
1450 | r = amdgpu_gfx_run_cleaner_shader_job(ring); | |
1451 | if (r) | |
1452 | return r; | |
1453 | num_xcc_to_clear--; | |
1454 | break; | |
1455 | } | |
1456 | } | |
1457 | } | |
1458 | ||
1459 | if (num_xcc_to_clear) | |
1460 | return -ENOENT; | |
1461 | ||
1462 | return 0; | |
1463 | } | |
1464 | ||
1465 | static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, | |
1466 | struct device_attribute *attr, | |
1467 | const char *buf, | |
1468 | size_t count) | |
1469 | { | |
1470 | struct drm_device *ddev = dev_get_drvdata(dev); | |
1471 | struct amdgpu_device *adev = drm_to_adev(ddev); | |
1472 | int ret; | |
1473 | long value; | |
1474 | ||
1475 | if (amdgpu_in_reset(adev)) | |
1476 | return -EPERM; | |
1477 | if (adev->in_suspend && !adev->in_runpm) | |
1478 | return -EPERM; | |
1479 | ||
1480 | ret = kstrtol(buf, 0, &value); | |
1481 | ||
1482 | if (ret) | |
1483 | return -EINVAL; | |
1484 | ||
1485 | if (value < 0) | |
1486 | return -EINVAL; | |
1487 | ||
1488 | if (adev->xcp_mgr) { | |
1489 | if (value >= adev->xcp_mgr->num_xcps) | |
1490 | return -EINVAL; | |
1491 | } else { | |
1492 | if (value > 1) | |
1493 | return -EINVAL; | |
1494 | } | |
1495 | ||
1496 | ret = pm_runtime_get_sync(ddev->dev); | |
1497 | if (ret < 0) { | |
1498 | pm_runtime_put_autosuspend(ddev->dev); | |
1499 | return ret; | |
1500 | } | |
1501 | ||
1502 | ret = amdgpu_gfx_run_cleaner_shader(adev, value); | |
1503 | ||
1504 | pm_runtime_mark_last_busy(ddev->dev); | |
1505 | pm_runtime_put_autosuspend(ddev->dev); | |
1506 | ||
1507 | if (ret) | |
1508 | return ret; | |
1509 | ||
1510 | return count; | |
1511 | } | |
1512 | ||
e189be9b SS |
1513 | static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, |
1514 | struct device_attribute *attr, | |
1515 | char *buf) | |
1516 | { | |
1517 | struct drm_device *ddev = dev_get_drvdata(dev); | |
1518 | struct amdgpu_device *adev = drm_to_adev(ddev); | |
1519 | int i; | |
1520 | ssize_t size = 0; | |
1521 | ||
1522 | if (adev->xcp_mgr) { | |
1523 | for (i = 0; i < adev->xcp_mgr->num_xcps; i++) { | |
1524 | size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]); | |
1525 | if (i < (adev->xcp_mgr->num_xcps - 1)) | |
1526 | size += sysfs_emit_at(buf, size, " "); | |
1527 | } | |
1528 | buf[size++] = '\n'; | |
1529 | } else { | |
1530 | size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]); | |
1531 | } | |
1532 | ||
1533 | return size; | |
1534 | } | |
1535 | ||
1536 | static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, | |
1537 | struct device_attribute *attr, | |
1538 | const char *buf, size_t count) | |
1539 | { | |
1540 | struct drm_device *ddev = dev_get_drvdata(dev); | |
1541 | struct amdgpu_device *adev = drm_to_adev(ddev); | |
1542 | long partition_values[MAX_XCP] = {0}; | |
1543 | int ret, i, num_partitions; | |
1544 | const char *input_buf = buf; | |
1545 | ||
1546 | for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { | |
1547 | ret = sscanf(input_buf, "%ld", &partition_values[i]); | |
1548 | if (ret <= 0) | |
1549 | break; | |
1550 | ||
1551 | /* Move the pointer to the next value in the string */ | |
1552 | input_buf = strchr(input_buf, ' '); | |
1553 | if (input_buf) { | |
1554 | input_buf++; | |
1555 | } else { | |
1556 | i++; | |
1557 | break; | |
1558 | } | |
1559 | } | |
1560 | num_partitions = i; | |
1561 | ||
1562 | if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps) | |
1563 | return -EINVAL; | |
1564 | ||
1565 | if (!adev->xcp_mgr && num_partitions != 1) | |
1566 | return -EINVAL; | |
1567 | ||
1568 | for (i = 0; i < num_partitions; i++) { | |
1569 | if (partition_values[i] != 0 && partition_values[i] != 1) | |
1570 | return -EINVAL; | |
1571 | } | |
1572 | ||
1573 | mutex_lock(&adev->enforce_isolation_mutex); | |
1574 | ||
1575 | for (i = 0; i < num_partitions; i++) { | |
1576 | if (adev->enforce_isolation[i] && !partition_values[i]) { | |
1577 | /* Going from enabled to disabled */ | |
1578 | amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); | |
1579 | } else if (!adev->enforce_isolation[i] && partition_values[i]) { | |
1580 | /* Going from disabled to enabled */ | |
1581 | amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); | |
1582 | } | |
1583 | adev->enforce_isolation[i] = partition_values[i]; | |
1584 | } | |
1585 | ||
1586 | mutex_unlock(&adev->enforce_isolation_mutex); | |
1587 | ||
1588 | return count; | |
1589 | } | |
1590 | ||
d361ad5d SS |
1591 | static DEVICE_ATTR(run_cleaner_shader, 0200, |
1592 | NULL, amdgpu_gfx_set_run_cleaner_shader); | |
1593 | ||
e189be9b SS |
1594 | static DEVICE_ATTR(enforce_isolation, 0644, |
1595 | amdgpu_gfx_get_enforce_isolation, | |
1596 | amdgpu_gfx_set_enforce_isolation); | |
1597 | ||
50fbe0cc | 1598 | static DEVICE_ATTR(current_compute_partition, 0644, |
98a54e88 LM |
1599 | amdgpu_gfx_get_current_compute_partition, |
1600 | amdgpu_gfx_set_compute_partition); | |
1601 | ||
50fbe0cc | 1602 | static DEVICE_ATTR(available_compute_partition, 0444, |
98a54e88 LM |
1603 | amdgpu_gfx_get_available_compute_partition, NULL); |
1604 | ||
1605 | int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) | |
1606 | { | |
f8588f05 LL |
1607 | struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; |
1608 | bool xcp_switch_supported; | |
98a54e88 LM |
1609 | int r; |
1610 | ||
f8588f05 LL |
1611 | if (!xcp_mgr) |
1612 | return 0; | |
1613 | ||
1614 | xcp_switch_supported = | |
1615 | (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode); | |
1616 | ||
1617 | if (!xcp_switch_supported) | |
1618 | dev_attr_current_compute_partition.attr.mode &= | |
1619 | ~(S_IWUSR | S_IWGRP | S_IWOTH); | |
1620 | ||
98a54e88 LM |
1621 | r = device_create_file(adev->dev, &dev_attr_current_compute_partition); |
1622 | if (r) | |
1623 | return r; | |
1624 | ||
f8588f05 LL |
1625 | if (xcp_switch_supported) |
1626 | r = device_create_file(adev->dev, | |
1627 | &dev_attr_available_compute_partition); | |
98a54e88 | 1628 | |
b6f90baa | 1629 | return r; |
98a54e88 | 1630 | } |
993d218f SZ |
1631 | |
1632 | void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) | |
1633 | { | |
f8588f05 LL |
1634 | struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; |
1635 | bool xcp_switch_supported; | |
1636 | ||
1637 | if (!xcp_mgr) | |
1638 | return; | |
1639 | ||
1640 | xcp_switch_supported = | |
1641 | (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode); | |
993d218f | 1642 | device_remove_file(adev->dev, &dev_attr_current_compute_partition); |
f8588f05 LL |
1643 | |
1644 | if (xcp_switch_supported) | |
1645 | device_remove_file(adev->dev, | |
1646 | &dev_attr_available_compute_partition); | |
993d218f | 1647 | } |
aec773a1 | 1648 | |
e189be9b SS |
1649 | int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) |
1650 | { | |
1651 | int r; | |
1652 | ||
dc8847b0 AD |
1653 | r = device_create_file(adev->dev, &dev_attr_enforce_isolation); |
1654 | if (r) | |
1655 | return r; | |
e189be9b | 1656 | |
d361ad5d SS |
1657 | r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); |
1658 | if (r) | |
1659 | return r; | |
1660 | ||
e189be9b SS |
1661 | return 0; |
1662 | } | |
1663 | ||
1664 | void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) | |
1665 | { | |
dc8847b0 | 1666 | device_remove_file(adev->dev, &dev_attr_enforce_isolation); |
d361ad5d | 1667 | device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); |
e189be9b SS |
1668 | } |
1669 | ||
aec773a1 SS |
1670 | int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, |
1671 | unsigned int cleaner_shader_size) | |
1672 | { | |
1673 | if (!adev->gfx.enable_cleaner_shader) | |
1674 | return -EOPNOTSUPP; | |
1675 | ||
1676 | return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE, | |
1677 | AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, | |
1678 | &adev->gfx.cleaner_shader_obj, | |
1679 | &adev->gfx.cleaner_shader_gpu_addr, | |
1680 | (void **)&adev->gfx.cleaner_shader_cpu_ptr); | |
1681 | } | |
1682 | ||
1683 | void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev) | |
1684 | { | |
1685 | if (!adev->gfx.enable_cleaner_shader) | |
1686 | return; | |
1687 | ||
1688 | amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj, | |
1689 | &adev->gfx.cleaner_shader_gpu_addr, | |
1690 | (void **)&adev->gfx.cleaner_shader_cpu_ptr); | |
1691 | } | |
1692 | ||
1693 | void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, | |
1694 | unsigned int cleaner_shader_size, | |
1695 | const void *cleaner_shader_ptr) | |
1696 | { | |
1697 | if (!adev->gfx.enable_cleaner_shader) | |
1698 | return; | |
1699 | ||
1700 | if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr) | |
1701 | memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr, | |
1702 | cleaner_shader_size); | |
1703 | } | |
afefd6f2 SS |
1704 | |
1705 | /** | |
1706 | * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver) | |
1707 | * @adev: amdgpu_device pointer | |
1708 | * @idx: Index of the scheduler to control | |
1709 | * @enable: Whether to enable or disable the KFD scheduler | |
1710 | * | |
1711 | * This function is used to control the KFD (Kernel Fusion Driver) scheduler | |
1712 | * from the KGD. It is part of the cleaner shader feature. This function plays | |
1713 | * a key role in enforcing process isolation on the GPU. | |
1714 | * | |
1715 | * The function uses a reference count mechanism (kfd_sch_req_count) to keep | |
1716 | * track of the number of requests to enable the KFD scheduler. When a request | |
1717 | * to enable the KFD scheduler is made, the reference count is decremented. | |
1718 | * When the reference count reaches zero, a delayed work is scheduled to | |
1719 | * enforce isolation after a delay of GFX_SLICE_PERIOD. | |
1720 | * | |
1721 | * When a request to disable the KFD scheduler is made, the function first | |
1722 | * checks if the reference count is zero. If it is, it cancels the delayed work | |
1723 | * for enforcing isolation and checks if the KFD scheduler is active. If the | |
1724 | * KFD scheduler is active, it sends a request to stop the KFD scheduler and | |
1725 | * sets the KFD scheduler state to inactive. Then, it increments the reference | |
1726 | * count. | |
1727 | * | |
1728 | * The function is synchronized using the kfd_sch_mutex to ensure that the KFD | |
1729 | * scheduler state and reference count are updated atomically. | |
1730 | * | |
1731 | * Note: If the reference count is already zero when a request to enable the | |
1732 | * KFD scheduler is made, it means there's an imbalance bug somewhere. The | |
1733 | * function triggers a warning in this case. | |
1734 | */ | |
1735 | static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, | |
1736 | bool enable) | |
1737 | { | |
1738 | mutex_lock(&adev->gfx.kfd_sch_mutex); | |
1739 | ||
1740 | if (enable) { | |
1741 | /* If the count is already 0, it means there's an imbalance bug somewhere. | |
1742 | * Note that the bug may be in a different caller than the one which triggers the | |
1743 | * WARN_ON_ONCE. | |
1744 | */ | |
1745 | if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) { | |
1746 | dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n"); | |
1747 | goto unlock; | |
1748 | } | |
1749 | ||
1750 | adev->gfx.kfd_sch_req_count[idx]--; | |
1751 | ||
1752 | if (adev->gfx.kfd_sch_req_count[idx] == 0 && | |
1753 | adev->gfx.kfd_sch_inactive[idx]) { | |
1754 | schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, | |
efe6a877 | 1755 | msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx])); |
afefd6f2 SS |
1756 | } |
1757 | } else { | |
1758 | if (adev->gfx.kfd_sch_req_count[idx] == 0) { | |
1759 | cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); | |
1760 | if (!adev->gfx.kfd_sch_inactive[idx]) { | |
1761 | amdgpu_amdkfd_stop_sched(adev, idx); | |
1762 | adev->gfx.kfd_sch_inactive[idx] = true; | |
1763 | } | |
1764 | } | |
1765 | ||
1766 | adev->gfx.kfd_sch_req_count[idx]++; | |
1767 | } | |
1768 | ||
1769 | unlock: | |
1770 | mutex_unlock(&adev->gfx.kfd_sch_mutex); | |
1771 | } | |
1772 | ||
1773 | /** | |
1774 | * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation | |
1775 | * | |
1776 | * @work: work_struct. | |
1777 | * | |
1778 | * This function is the work handler for enforcing shader isolation on AMD GPUs. | |
1779 | * It counts the number of emitted fences for each GFX and compute ring. If there | |
1780 | * are any fences, it schedules the `enforce_isolation_work` to be run after a | |
1781 | * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion | |
1782 | * Driver (KFD) to resume the runqueue. The function is synchronized using the | |
1783 | * `enforce_isolation_mutex`. | |
1784 | */ | |
1785 | void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) | |
1786 | { | |
1787 | struct amdgpu_isolation_work *isolation_work = | |
1788 | container_of(work, struct amdgpu_isolation_work, work.work); | |
1789 | struct amdgpu_device *adev = isolation_work->adev; | |
1790 | u32 i, idx, fences = 0; | |
1791 | ||
1792 | if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION) | |
1793 | idx = 0; | |
1794 | else | |
1795 | idx = isolation_work->xcp_id; | |
1796 | ||
1797 | if (idx >= MAX_XCP) | |
1798 | return; | |
1799 | ||
1800 | mutex_lock(&adev->enforce_isolation_mutex); | |
1801 | for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) { | |
1802 | if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id) | |
1803 | fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]); | |
1804 | } | |
1805 | for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) { | |
1806 | if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id) | |
1807 | fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); | |
1808 | } | |
1809 | if (fences) { | |
efe6a877 | 1810 | /* we've already had our timeslice, so let's wrap this up */ |
afefd6f2 | 1811 | schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, |
efe6a877 | 1812 | msecs_to_jiffies(1)); |
afefd6f2 SS |
1813 | } else { |
1814 | /* Tell KFD to resume the runqueue */ | |
1815 | if (adev->kfd.init_complete) { | |
1816 | WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); | |
1817 | WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); | |
1818 | amdgpu_amdkfd_start_sched(adev, idx); | |
1819 | adev->gfx.kfd_sch_inactive[idx] = false; | |
1820 | } | |
1821 | } | |
1822 | mutex_unlock(&adev->enforce_isolation_mutex); | |
1823 | } | |
1824 | ||
efe6a877 AD |
1825 | static void |
1826 | amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, | |
1827 | u32 idx) | |
1828 | { | |
1829 | unsigned long cjiffies; | |
1830 | bool wait = false; | |
1831 | ||
1832 | mutex_lock(&adev->enforce_isolation_mutex); | |
1833 | if (adev->enforce_isolation[idx]) { | |
1834 | /* set the initial values if nothing is set */ | |
1835 | if (!adev->gfx.enforce_isolation_jiffies[idx]) { | |
1836 | adev->gfx.enforce_isolation_jiffies[idx] = jiffies; | |
1837 | adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; | |
1838 | } | |
1839 | /* Make sure KFD gets a chance to run */ | |
1840 | if (amdgpu_amdkfd_compute_active(adev, idx)) { | |
1841 | cjiffies = jiffies; | |
1842 | if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) { | |
1843 | cjiffies -= adev->gfx.enforce_isolation_jiffies[idx]; | |
1844 | if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) { | |
1845 | /* if our time is up, let KGD work drain before scheduling more */ | |
1846 | wait = true; | |
1847 | /* reset the timer period */ | |
1848 | adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; | |
1849 | } else { | |
1850 | /* set the timer period to what's left in our time slice */ | |
1851 | adev->gfx.enforce_isolation_time[idx] = | |
1852 | GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies); | |
1853 | } | |
1854 | } else { | |
1855 | /* if jiffies wrap around we will just wait a little longer */ | |
1856 | adev->gfx.enforce_isolation_jiffies[idx] = jiffies; | |
1857 | } | |
1858 | } else { | |
1859 | /* if there is no KFD work, then set the full slice period */ | |
1860 | adev->gfx.enforce_isolation_jiffies[idx] = jiffies; | |
1861 | adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; | |
1862 | } | |
1863 | } | |
1864 | mutex_unlock(&adev->enforce_isolation_mutex); | |
1865 | ||
1866 | if (wait) | |
1867 | msleep(GFX_SLICE_PERIOD_MS); | |
1868 | } | |
1869 | ||
afefd6f2 SS |
1870 | void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) |
1871 | { | |
1872 | struct amdgpu_device *adev = ring->adev; | |
1873 | u32 idx; | |
1874 | ||
1875 | if (!adev->gfx.enable_cleaner_shader) | |
1876 | return; | |
1877 | ||
1878 | if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) | |
1879 | idx = 0; | |
1880 | else | |
1881 | idx = ring->xcp_id; | |
1882 | ||
1883 | if (idx >= MAX_XCP) | |
1884 | return; | |
1885 | ||
efe6a877 AD |
1886 | /* Don't submit more work until KFD has had some time */ |
1887 | amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx); | |
1888 | ||
afefd6f2 SS |
1889 | mutex_lock(&adev->enforce_isolation_mutex); |
1890 | if (adev->enforce_isolation[idx]) { | |
1891 | if (adev->kfd.init_complete) | |
1892 | amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); | |
1893 | } | |
1894 | mutex_unlock(&adev->enforce_isolation_mutex); | |
1895 | } | |
1896 | ||
1897 | void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) | |
1898 | { | |
1899 | struct amdgpu_device *adev = ring->adev; | |
1900 | u32 idx; | |
1901 | ||
1902 | if (!adev->gfx.enable_cleaner_shader) | |
1903 | return; | |
1904 | ||
1905 | if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) | |
1906 | idx = 0; | |
1907 | else | |
1908 | idx = ring->xcp_id; | |
1909 | ||
1910 | if (idx >= MAX_XCP) | |
1911 | return; | |
1912 | ||
1913 | mutex_lock(&adev->enforce_isolation_mutex); | |
1914 | if (adev->enforce_isolation[idx]) { | |
1915 | if (adev->kfd.init_complete) | |
1916 | amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); | |
1917 | } | |
1918 | mutex_unlock(&adev->enforce_isolation_mutex); | |
1919 | } | |
c5c63d9c JZ |
1920 | |
1921 | /* | |
1922 | * debugfs for to enable/disable gfx job submission to specific core. | |
1923 | */ | |
1924 | #if defined(CONFIG_DEBUG_FS) | |
1925 | static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val) | |
1926 | { | |
1927 | struct amdgpu_device *adev = (struct amdgpu_device *)data; | |
1928 | u32 i; | |
1929 | u64 mask = 0; | |
1930 | struct amdgpu_ring *ring; | |
1931 | ||
1932 | if (!adev) | |
1933 | return -ENODEV; | |
1934 | ||
1935 | mask = (1 << adev->gfx.num_gfx_rings) - 1; | |
1936 | if ((val & mask) == 0) | |
1937 | return -EINVAL; | |
1938 | ||
1939 | for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { | |
1940 | ring = &adev->gfx.gfx_ring[i]; | |
1941 | if (val & (1 << i)) | |
1942 | ring->sched.ready = true; | |
1943 | else | |
1944 | ring->sched.ready = false; | |
1945 | } | |
1946 | /* publish sched.ready flag update effective immediately across smp */ | |
1947 | smp_rmb(); | |
1948 | return 0; | |
1949 | } | |
1950 | ||
1951 | static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val) | |
1952 | { | |
1953 | struct amdgpu_device *adev = (struct amdgpu_device *)data; | |
1954 | u32 i; | |
1955 | u64 mask = 0; | |
1956 | struct amdgpu_ring *ring; | |
1957 | ||
1958 | if (!adev) | |
1959 | return -ENODEV; | |
1960 | for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { | |
1961 | ring = &adev->gfx.gfx_ring[i]; | |
1962 | if (ring->sched.ready) | |
1963 | mask |= 1 << i; | |
1964 | } | |
1965 | ||
1966 | *val = mask; | |
1967 | return 0; | |
1968 | } | |
1969 | ||
1970 | DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops, | |
1971 | amdgpu_debugfs_gfx_sched_mask_get, | |
1972 | amdgpu_debugfs_gfx_sched_mask_set, "%llx\n"); | |
1973 | ||
1974 | #endif | |
1975 | ||
1976 | void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev) | |
1977 | { | |
1978 | #if defined(CONFIG_DEBUG_FS) | |
1979 | struct drm_minor *minor = adev_to_drm(adev)->primary; | |
1980 | struct dentry *root = minor->debugfs_root; | |
1981 | char name[32]; | |
1982 | ||
1983 | if (!(adev->gfx.num_gfx_rings > 1)) | |
1984 | return; | |
1985 | sprintf(name, "amdgpu_gfx_sched_mask"); | |
1986 | debugfs_create_file(name, 0600, root, adev, | |
1987 | &amdgpu_debugfs_gfx_sched_mask_fops); | |
1988 | #endif | |
1989 | } | |
1990 | ||
1991 | /* | |
1992 | * debugfs for to enable/disable compute job submission to specific core. | |
1993 | */ | |
1994 | #if defined(CONFIG_DEBUG_FS) | |
1995 | static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val) | |
1996 | { | |
1997 | struct amdgpu_device *adev = (struct amdgpu_device *)data; | |
1998 | u32 i; | |
1999 | u64 mask = 0; | |
2000 | struct amdgpu_ring *ring; | |
2001 | ||
2002 | if (!adev) | |
2003 | return -ENODEV; | |
2004 | ||
2005 | mask = (1 << adev->gfx.num_compute_rings) - 1; | |
2006 | if ((val & mask) == 0) | |
2007 | return -EINVAL; | |
2008 | ||
2009 | for (i = 0; i < adev->gfx.num_compute_rings; ++i) { | |
2010 | ring = &adev->gfx.compute_ring[i]; | |
2011 | if (val & (1 << i)) | |
2012 | ring->sched.ready = true; | |
2013 | else | |
2014 | ring->sched.ready = false; | |
2015 | } | |
2016 | ||
2017 | /* publish sched.ready flag update effective immediately across smp */ | |
2018 | smp_rmb(); | |
2019 | return 0; | |
2020 | } | |
2021 | ||
2022 | static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val) | |
2023 | { | |
2024 | struct amdgpu_device *adev = (struct amdgpu_device *)data; | |
2025 | u32 i; | |
2026 | u64 mask = 0; | |
2027 | struct amdgpu_ring *ring; | |
2028 | ||
2029 | if (!adev) | |
2030 | return -ENODEV; | |
2031 | for (i = 0; i < adev->gfx.num_compute_rings; ++i) { | |
2032 | ring = &adev->gfx.compute_ring[i]; | |
2033 | if (ring->sched.ready) | |
2034 | mask |= 1 << i; | |
2035 | } | |
2036 | ||
2037 | *val = mask; | |
2038 | return 0; | |
2039 | } | |
2040 | ||
2041 | DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops, | |
2042 | amdgpu_debugfs_compute_sched_mask_get, | |
2043 | amdgpu_debugfs_compute_sched_mask_set, "%llx\n"); | |
2044 | ||
2045 | #endif | |
2046 | ||
2047 | void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev) | |
2048 | { | |
2049 | #if defined(CONFIG_DEBUG_FS) | |
2050 | struct drm_minor *minor = adev_to_drm(adev)->primary; | |
2051 | struct dentry *root = minor->debugfs_root; | |
2052 | char name[32]; | |
2053 | ||
2054 | if (!(adev->gfx.num_compute_rings > 1)) | |
2055 | return; | |
2056 | sprintf(name, "amdgpu_compute_sched_mask"); | |
2057 | debugfs_create_file(name, 0600, root, adev, | |
2058 | &amdgpu_debugfs_compute_sched_mask_fops); | |
2059 | #endif | |
2060 | } |