1 // SPDX-License-Identifier: MIT
3 * Copyright © 2023 Intel Corporation
8 #include <drm/drm_managed.h>
10 #include "abi/gsc_mkhi_commands_abi.h"
11 #include "generated/xe_wa_oob.h"
14 #include "xe_device.h"
15 #include "xe_exec_queue.h"
16 #include "xe_gsc_submit.h"
18 #include "xe_gt_printk.h"
22 #include "xe_sched_job.h"
25 #include "instructions/xe_gsc_commands.h"
26 #include "regs/xe_gsc_regs.h"
29 gsc_to_gt(struct xe_gsc *gsc)
31 return container_of(gsc, struct xe_gt, uc.gsc);
34 static int memcpy_fw(struct xe_gsc *gsc)
36 struct xe_gt *gt = gsc_to_gt(gsc);
37 struct xe_device *xe = gt_to_xe(gt);
38 u32 fw_size = gsc->fw.size;
42 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use
45 storage = kmalloc(fw_size, GFP_KERNEL);
49 xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size);
50 xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size);
51 xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size);
58 static int emit_gsc_upload(struct xe_gsc *gsc)
60 struct xe_gt *gt = gsc_to_gt(gsc);
61 u64 offset = xe_bo_ggtt_addr(gsc->private);
63 struct xe_sched_job *job;
64 struct dma_fence *fence;
67 bb = xe_bb_new(gt, 4, false);
71 bb->cs[bb->len++] = GSC_FW_LOAD;
72 bb->cs[bb->len++] = lower_32_bits(offset);
73 bb->cs[bb->len++] = upper_32_bits(offset);
74 bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID;
76 job = xe_bb_create_job(gsc->q, bb);
82 xe_sched_job_arm(job);
83 fence = dma_fence_get(&job->drm.s_fence->finished);
84 xe_sched_job_push(job);
86 timeout = dma_fence_wait_timeout(fence, false, HZ);
97 #define version_query_wr(xe_, map_, offset_, field_, val_) \
98 xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_)
99 #define version_query_rd(xe_, map_, offset_, field_) \
100 xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_)
102 static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset)
104 xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in));
106 version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV);
107 version_query_wr(xe, map, wr_offset, header.command,
108 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION);
110 return wr_offset + sizeof(struct gsc_get_compatibility_version_in);
113 #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */
114 static int query_compatibility_version(struct xe_gsc *gsc)
116 struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
117 struct xe_gt *gt = gsc_to_gt(gsc);
118 struct xe_tile *tile = gt_to_tile(gt);
119 struct xe_device *xe = gt_to_xe(gt);
126 bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
128 XE_BO_CREATE_SYSTEM_BIT |
129 XE_BO_CREATE_GGTT_BIT);
131 xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
135 ggtt_offset = xe_bo_ggtt_addr(bo);
137 wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0,
138 sizeof(struct gsc_get_compatibility_version_in));
139 wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset);
141 err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset,
142 ggtt_offset + GSC_VER_PKT_SZ,
146 "failed to submit GSC request for compatibility version: %d\n",
151 err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ,
152 sizeof(struct gsc_get_compatibility_version_out),
155 xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err);
159 compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major);
160 compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor);
162 xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor);
165 xe_bo_unpin_map_no_vm(bo);
169 static int gsc_fw_is_loaded(struct xe_gt *gt)
171 return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
172 HECI1_FWSTS1_INIT_COMPLETE;
175 static int gsc_fw_wait(struct xe_gt *gt)
178 * GSC load can take up to 250ms from the moment the instruction is
179 * executed by the GSCCS. To account for possible submission delays or
180 * other issues, we use a 500ms timeout in the wait here.
182 return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
183 HECI1_FWSTS1_INIT_COMPLETE,
184 HECI1_FWSTS1_INIT_COMPLETE,
185 500 * USEC_PER_MSEC, NULL, false);
188 static int gsc_upload(struct xe_gsc *gsc)
190 struct xe_gt *gt = gsc_to_gt(gsc);
191 struct xe_device *xe = gt_to_xe(gt);
194 /* we should only be here if the init step were successful */
195 xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q);
197 if (gsc_fw_is_loaded(gt)) {
198 xe_gt_err(gt, "GSC already loaded at upload time\n");
202 err = memcpy_fw(gsc);
204 xe_gt_err(gt, "Failed to memcpy GSC FW\n");
209 * GSC is only killed by an FLR, so we need to trigger one on unload to
210 * make sure we stop it. This is because we assign a chunk of memory to
211 * the GSC as part of the FW load, so we need to make sure it stops
212 * using it when we release it to the system on driver unload. Note that
213 * this is not a problem of the unload per-se, because the GSC will not
214 * touch that memory unless there are requests for it coming from the
215 * driver; therefore, no accesses will happen while Xe is not loaded,
216 * but if we re-load the driver then the GSC might wake up and try to
217 * access that old memory location again.
218 * Given that an FLR is a very disruptive action (see the FLR function
219 * for details), we want to do it as the last action before releasing
220 * the access to the MMIO bar, which means we need to do it as part of
223 xe->needs_flr_on_fini = true;
225 err = emit_gsc_upload(gsc);
227 xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err));
231 err = gsc_fw_wait(gt);
233 xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err));
237 err = query_compatibility_version(gsc);
241 err = xe_uc_fw_check_version_requirements(&gsc->fw);
245 xe_gt_dbg(gt, "GSC FW async load completed\n");
250 static void gsc_work(struct work_struct *work)
252 struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
253 struct xe_gt *gt = gsc_to_gt(gsc);
254 struct xe_device *xe = gt_to_xe(gt);
257 xe_device_mem_access_get(xe);
258 xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
260 ret = gsc_upload(gsc);
261 if (ret && ret != -EEXIST) {
262 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
266 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
268 /* HuC auth failure is not fatal */
269 if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC))
270 xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC);
273 xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
274 xe_device_mem_access_put(xe);
277 int xe_gsc_init(struct xe_gsc *gsc)
279 struct xe_gt *gt = gsc_to_gt(gsc);
280 struct xe_tile *tile = gt_to_tile(gt);
283 gsc->fw.type = XE_UC_FW_TYPE_GSC;
284 INIT_WORK(&gsc->work, gsc_work);
286 /* The GSC uC is only available on the media GT */
287 if (tile->media_gt && (gt != tile->media_gt)) {
288 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
293 * Some platforms can have GuC but not GSC. That would cause
294 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort
295 * all firmware loading. So check for GSC being enabled before
296 * propagating the failure back up. That way the higher level will keep
297 * going and load GuC as appropriate.
299 ret = xe_uc_fw_init(&gsc->fw);
300 if (!xe_uc_fw_is_enabled(&gsc->fw))
308 xe_gt_err(gt, "GSC init failed with %d", ret);
312 static void free_resources(struct drm_device *drm, void *arg)
314 struct xe_gsc *gsc = arg;
317 destroy_workqueue(gsc->wq);
322 xe_exec_queue_put(gsc->q);
327 xe_bo_unpin_map_no_vm(gsc->private);
332 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
334 struct xe_gt *gt = gsc_to_gt(gsc);
335 struct xe_tile *tile = gt_to_tile(gt);
336 struct xe_device *xe = gt_to_xe(gt);
337 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);
338 struct xe_exec_queue *q;
339 struct workqueue_struct *wq;
343 if (!xe_uc_fw_is_available(&gsc->fw))
349 bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M,
351 XE_BO_CREATE_STOLEN_BIT |
352 XE_BO_CREATE_GGTT_BIT);
356 q = xe_exec_queue_create(xe, NULL,
357 BIT(hwe->logical_instance), 1, hwe,
358 EXEC_QUEUE_FLAG_KERNEL |
359 EXEC_QUEUE_FLAG_PERMANENT);
361 xe_gt_err(gt, "Failed to create queue for GSC submission\n");
366 wq = alloc_ordered_workqueue("gsc-ordered-wq", 0);
376 err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc);
380 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE);
385 xe_exec_queue_put(q);
387 xe_bo_unpin_map_no_vm(bo);
391 void xe_gsc_load_start(struct xe_gsc *gsc)
393 struct xe_gt *gt = gsc_to_gt(gsc);
395 if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q)
398 /* GSC FW survives GT reset and D3Hot */
399 if (gsc_fw_is_loaded(gt)) {
400 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
404 queue_work(gsc->wq, &gsc->work);
407 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
409 if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq)
410 flush_work(&gsc->work);
414 * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
415 * GSC engine reset by writing a notification bit in the GS1 register and then
416 * triggering an interrupt to GSC; from the interrupt it will take up to 200ms
417 * for the FW to get prepare for the reset, so we need to wait for that amount
419 * After the reset is complete we need to then clear the GS1 register.
421 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
423 u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0;
424 u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP;
426 /* WA only applies if the GSC is loaded */
427 if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
430 xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
433 /* make sure the reset bit is clear when writing the CSR reg */
434 xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE),
435 HECI_H_CSR_RST, HECI_H_CSR_IG);