Git Repo - linux.git/blame_incremental - drivers/gpu/drm/amd/amdgpu/amdgpu

0 / 2060 ( 0%)

Commit	Line	Data
	1	/*
	2	* Copyright 2014 Advanced Micro Devices, Inc.
	3	* Copyright 2008 Red Hat Inc.
	4	* Copyright 2009 Jerome Glisse.
	5	*
	6	* Permission is hereby granted, free of charge, to any person obtaining a
	7	* copy of this software and associated documentation files (the "Software"),
	8	* to deal in the Software without restriction, including without limitation
	9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	10	* and/or sell copies of the Software, and to permit persons to whom the
	11	* Software is furnished to do so, subject to the following conditions:
	12	*
	13	* The above copyright notice and this permission notice shall be included in
	14	* all copies or substantial portions of the Software.
	15	*
	16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	19	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
	20	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
	21	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
	22	* OTHER DEALINGS IN THE SOFTWARE.
	23	*
	24	*/
	25
	26	#include <linux/firmware.h>
	27	#include <linux/pm_runtime.h>
	28
	29	#include "amdgpu.h"
	30	#include "amdgpu_gfx.h"
	31	#include "amdgpu_rlc.h"
	32	#include "amdgpu_ras.h"
	33	#include "amdgpu_reset.h"
	34	#include "amdgpu_xcp.h"
	35	#include "amdgpu_xgmi.h"
	36
	37	/* delay 0.1 second to enable gfx off feature */
	38	#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
	39
	40	#define GFX_OFF_NO_DELAY 0
	41
	42	/*
	43	* GPU GFX IP block helpers function.
	44	*/
	45
	46	int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
	47	int pipe, int queue)
	48	{
	49	int bit = 0;
	50
	51	bit += mec * adev->gfx.mec.num_pipe_per_mec
	52	* adev->gfx.mec.num_queue_per_pipe;
	53	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
	54	bit += queue;
	55
	56	return bit;
	57	}
	58
	59	void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
	60	int mec, int pipe, int *queue)
	61	{
	62	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
	63	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
	64	% adev->gfx.mec.num_pipe_per_mec;
	65	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
	66	/ adev->gfx.mec.num_pipe_per_mec;
	67
	68	}
	69
	70	bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
	71	int xcc_id, int mec, int pipe, int queue)
	72	{
	73	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
	74	adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
	75	}
	76
	77	int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
	78	int me, int pipe, int queue)
	79	{
	80	int bit = 0;
	81
	82	bit += me * adev->gfx.me.num_pipe_per_me
	83	* adev->gfx.me.num_queue_per_pipe;
	84	bit += pipe * adev->gfx.me.num_queue_per_pipe;
	85	bit += queue;
	86
	87	return bit;
	88	}
	89
	90	bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
	91	int me, int pipe, int queue)
	92	{
	93	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
	94	adev->gfx.me.queue_bitmap);
	95	}
	96
	97	/**
	98	* amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
	99	*
	100	* @mask: array in which the per-shader array disable masks will be stored
	101	* @max_se: number of SEs
	102	* @max_sh: number of SHs
	103	*
	104	* The bitmask of CUs to be disabled in the shader array determined by se and
	105	* sh is stored in mask[se * max_sh + sh].
	106	*/
	107	void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
	108	{
	109	unsigned int se, sh, cu;
	110	const char *p;
	111
	112	memset(mask, 0, sizeof(mask) max_se * max_sh);
	113
	114	if (!amdgpu_disable_cu \|\| !*amdgpu_disable_cu)
	115	return;
	116
	117	p = amdgpu_disable_cu;
	118	for (;;) {
	119	char *next;
	120	int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
	121
	122	if (ret < 3) {
	123	DRM_ERROR("amdgpu: could not parse disable_cu\n");
	124	return;
	125	}
	126
	127	if (se < max_se && sh < max_sh && cu < 16) {
	128	DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
	129	mask[se * max_sh + sh] \|= 1u << cu;
	130	} else {
	131	DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
	132	se, sh, cu);
	133	}
	134
	135	next = strchr(p, ',');
	136	if (!next)
	137	break;
	138	p = next + 1;
	139	}
	140	}
	141
	142	static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
	143	{
	144	return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
	145	}
	146
	147	static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
	148	{
	149	if (amdgpu_compute_multipipe != -1) {
	150	DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
	151	amdgpu_compute_multipipe);
	152	return amdgpu_compute_multipipe == 1;
	153	}
	154
	155	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
	156	return true;
	157
	158	/* FIXME: spreading the queues across pipes causes perf regressions
	159	* on POLARIS11 compute workloads */
	160	if (adev->asic_type == CHIP_POLARIS11)
	161	return false;
	162
	163	return adev->gfx.mec.num_mec > 1;
	164	}
	165
	166	bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
	167	struct amdgpu_ring *ring)
	168	{
	169	int queue = ring->queue;
	170	int pipe = ring->pipe;
	171
	172	/* Policy: use pipe1 queue0 as high priority graphics queue if we
	173	* have more than one gfx pipe.
	174	*/
	175	if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
	176	adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
	177	int me = ring->me;
	178	int bit;
	179
	180	bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
	181	if (ring == &adev->gfx.gfx_ring[bit])
	182	return true;
	183	}
	184
	185	return false;
	186	}
	187
	188	bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
	189	struct amdgpu_ring *ring)
	190	{
	191	/* Policy: use 1st queue as high priority compute queue if we
	192	* have more than one compute queue.
	193	*/
	194	if (adev->gfx.num_compute_rings > 1 &&
	195	ring == &adev->gfx.compute_ring[0])
	196	return true;
	197
	198	return false;
	199	}
	200
	201	void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
	202	{
	203	int i, j, queue, pipe;
	204	bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
	205	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
	206	adev->gfx.mec.num_queue_per_pipe,
	207	adev->gfx.num_compute_rings);
	208	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
	209
	210	if (multipipe_policy) {
	211	/* policy: make queues evenly cross all pipes on MEC1 only
	212	* for multiple xcc, just use the original policy for simplicity */
	213	for (j = 0; j < num_xcc; j++) {
	214	for (i = 0; i < max_queues_per_mec; i++) {
	215	pipe = i % adev->gfx.mec.num_pipe_per_mec;
	216	queue = (i / adev->gfx.mec.num_pipe_per_mec) %
	217	adev->gfx.mec.num_queue_per_pipe;
	218
	219	set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
	220	adev->gfx.mec_bitmap[j].queue_bitmap);
	221	}
	222	}
	223	} else {
	224	/* policy: amdgpu owns all queues in the given pipe */
	225	for (j = 0; j < num_xcc; j++) {
	226	for (i = 0; i < max_queues_per_mec; ++i)
	227	set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
	228	}
	229	}
	230
	231	for (j = 0; j < num_xcc; j++) {
	232	dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
	233	bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
	234	}
	235	}
	236
	237	void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
	238	{
	239	int i, queue, pipe;
	240	bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
	241	int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
	242	adev->gfx.me.num_queue_per_pipe;
	243
	244	if (multipipe_policy) {
	245	/* policy: amdgpu owns the first queue per pipe at this stage
	246	* will extend to mulitple queues per pipe later */
	247	for (i = 0; i < max_queues_per_me; i++) {
	248	pipe = i % adev->gfx.me.num_pipe_per_me;
	249	queue = (i / adev->gfx.me.num_pipe_per_me) %
	250	adev->gfx.me.num_queue_per_pipe;
	251
	252	set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
	253	adev->gfx.me.queue_bitmap);
	254	}
	255	} else {
	256	for (i = 0; i < max_queues_per_me; ++i)
	257	set_bit(i, adev->gfx.me.queue_bitmap);
	258	}
	259
	260	/* update the number of active graphics rings */
	261	adev->gfx.num_gfx_rings =
	262	bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
	263	}
	264
	265	static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
	266	struct amdgpu_ring *ring, int xcc_id)
	267	{
	268	int queue_bit;
	269	int mec, pipe, queue;
	270
	271	queue_bit = adev->gfx.mec.num_mec
	272	* adev->gfx.mec.num_pipe_per_mec
	273	* adev->gfx.mec.num_queue_per_pipe;
	274
	275	while (--queue_bit >= 0) {
	276	if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
	277	continue;
	278
	279	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
	280
	281	/*
	282	* 1. Using pipes 2/3 from MEC 2 seems cause problems.
	283	* 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
	284	* only can be issued on queue 0.
	285	*/
	286	if ((mec == 1 && pipe > 1) \|\| queue != 0)
	287	continue;
	288
	289	ring->me = mec + 1;
	290	ring->pipe = pipe;
	291	ring->queue = queue;
	292
	293	return 0;
	294	}
	295
	296	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
	297	return -EINVAL;
	298	}
	299
	300	int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
	301	{
	302	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	303	struct amdgpu_irq_src *irq = &kiq->irq;
	304	struct amdgpu_ring *ring = &kiq->ring;
	305	int r = 0;
	306
	307	spin_lock_init(&kiq->ring_lock);
	308
	309	ring->adev = NULL;
	310	ring->ring_obj = NULL;
	311	ring->use_doorbell = true;
	312	ring->xcc_id = xcc_id;
	313	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
	314	ring->doorbell_index =
	315	(adev->doorbell_index.kiq +
	316	xcc_id * adev->doorbell_index.xcc_doorbell_range)
	317	<< 1;
	318
	319	r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
	320	if (r)
	321	return r;
	322
	323	ring->eop_gpu_addr = kiq->eop_gpu_addr;
	324	ring->no_scheduler = true;
	325	snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
	326	(unsigned char)xcc_id, (unsigned char)ring->me,
	327	(unsigned char)ring->pipe, (unsigned char)ring->queue);
	328	r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
	329	AMDGPU_RING_PRIO_DEFAULT, NULL);
	330	if (r)
	331	dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
	332
	333	return r;
	334	}
	335
	336	void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
	337	{
	338	amdgpu_ring_fini(ring);
	339	}
	340
	341	void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
	342	{
	343	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	344
	345	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
	346	}
	347
	348	int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
	349	unsigned int hpd_size, int xcc_id)
	350	{
	351	int r;
	352	u32 *hpd;
	353	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	354
	355	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
	356	AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
	357	&kiq->eop_gpu_addr, (void **)&hpd);
	358	if (r) {
	359	dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
	360	return r;
	361	}
	362
	363	memset(hpd, 0, hpd_size);
	364
	365	r = amdgpu_bo_reserve(kiq->eop_obj, true);
	366	if (unlikely(r != 0))
	367	dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
	368	amdgpu_bo_kunmap(kiq->eop_obj);
	369	amdgpu_bo_unreserve(kiq->eop_obj);
	370
	371	return 0;
	372	}
	373
	374	/* create MQD for each compute/gfx queue */
	375	int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
	376	unsigned int mqd_size, int xcc_id)
	377	{
	378	int r, i, j;
	379	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	380	struct amdgpu_ring *ring = &kiq->ring;
	381	u32 domain = AMDGPU_GEM_DOMAIN_GTT;
	382
	383	#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
	384	/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
	385	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
	386	domain \|= AMDGPU_GEM_DOMAIN_VRAM;
	387	#endif
	388
	389	/* create MQD for KIQ */
	390	if (!adev->enable_mes_kiq && !ring->mqd_obj) {
	391	/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
	392	* otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
	393	* deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
	394	* KIQ MQD no matter SRIOV or Bare-metal
	395	*/
	396	r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
	397	AMDGPU_GEM_DOMAIN_VRAM \|
	398	AMDGPU_GEM_DOMAIN_GTT,
	399	&ring->mqd_obj,
	400	&ring->mqd_gpu_addr,
	401	&ring->mqd_ptr);
	402	if (r) {
	403	dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
	404	return r;
	405	}
	406
	407	/* prepare MQD backup */
	408	kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL);
	409	if (!kiq->mqd_backup) {
	410	dev_warn(adev->dev,
	411	"no memory to create MQD backup for ring %s\n", ring->name);
	412	return -ENOMEM;
	413	}
	414	}
	415
	416	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
	417	/* create MQD for each KGQ */
	418	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
	419	ring = &adev->gfx.gfx_ring[i];
	420	if (!ring->mqd_obj) {
	421	r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
	422	domain, &ring->mqd_obj,
	423	&ring->mqd_gpu_addr, &ring->mqd_ptr);
	424	if (r) {
	425	dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
	426	return r;
	427	}
	428
	429	ring->mqd_size = mqd_size;
	430	/* prepare MQD backup */
	431	adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL);
	432	if (!adev->gfx.me.mqd_backup[i]) {
	433	dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
	434	return -ENOMEM;
	435	}
	436	}
	437	}
	438	}
	439
	440	/* create MQD for each KCQ */
	441	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
	442	j = i + xcc_id * adev->gfx.num_compute_rings;
	443	ring = &adev->gfx.compute_ring[j];
	444	if (!ring->mqd_obj) {
	445	r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
	446	domain, &ring->mqd_obj,
	447	&ring->mqd_gpu_addr, &ring->mqd_ptr);
	448	if (r) {
	449	dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
	450	return r;
	451	}
	452
	453	ring->mqd_size = mqd_size;
	454	/* prepare MQD backup */
	455	adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL);
	456	if (!adev->gfx.mec.mqd_backup[j]) {
	457	dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
	458	return -ENOMEM;
	459	}
	460	}
	461	}
	462
	463	return 0;
	464	}
	465
	466	void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
	467	{
	468	struct amdgpu_ring *ring = NULL;
	469	int i, j;
	470	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	471
	472	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
	473	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
	474	ring = &adev->gfx.gfx_ring[i];
	475	kfree(adev->gfx.me.mqd_backup[i]);
	476	amdgpu_bo_free_kernel(&ring->mqd_obj,
	477	&ring->mqd_gpu_addr,
	478	&ring->mqd_ptr);
	479	}
	480	}
	481
	482	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
	483	j = i + xcc_id * adev->gfx.num_compute_rings;
	484	ring = &adev->gfx.compute_ring[j];
	485	kfree(adev->gfx.mec.mqd_backup[j]);
	486	amdgpu_bo_free_kernel(&ring->mqd_obj,
	487	&ring->mqd_gpu_addr,
	488	&ring->mqd_ptr);
	489	}
	490
	491	ring = &kiq->ring;
	492	kfree(kiq->mqd_backup);
	493	amdgpu_bo_free_kernel(&ring->mqd_obj,
	494	&ring->mqd_gpu_addr,
	495	&ring->mqd_ptr);
	496	}
	497
	498	int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
	499	{
	500	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	501	struct amdgpu_ring *kiq_ring = &kiq->ring;
	502	int i, r = 0;
	503	int j;
	504
	505	if (adev->enable_mes) {
	506	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
	507	j = i + xcc_id * adev->gfx.num_compute_rings;
	508	amdgpu_mes_unmap_legacy_queue(adev,
	509	&adev->gfx.compute_ring[j],
	510	RESET_QUEUES, 0, 0);
	511	}
	512	return 0;
	513	}
	514
	515	if (!kiq->pmf \|\| !kiq->pmf->kiq_unmap_queues)
	516	return -EINVAL;
	517
	518	spin_lock(&kiq->ring_lock);
	519	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
	520	adev->gfx.num_compute_rings)) {
	521	spin_unlock(&kiq->ring_lock);
	522	return -ENOMEM;
	523	}
	524
	525	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
	526	j = i + xcc_id * adev->gfx.num_compute_rings;
	527	kiq->pmf->kiq_unmap_queues(kiq_ring,
	528	&adev->gfx.compute_ring[j],
	529	RESET_QUEUES, 0, 0);
	530	}
	531
	532	/**
	533	* This is workaround: only skip kiq_ring test
	534	* during ras recovery in suspend stage for gfx9.4.3
	535	*/
	536	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) \|\|
	537	amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
	538	amdgpu_ras_in_recovery(adev)) {
	539	spin_unlock(&kiq->ring_lock);
	540	return 0;
	541	}
	542
	543	if (kiq_ring->sched.ready && !adev->job_hang)
	544	r = amdgpu_ring_test_helper(kiq_ring);
	545	spin_unlock(&kiq->ring_lock);
	546
	547	return r;
	548	}
	549
	550	int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
	551	{
	552	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	553	struct amdgpu_ring *kiq_ring = &kiq->ring;
	554	int i, r = 0;
	555	int j;
	556
	557	if (adev->enable_mes) {
	558	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
	559	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
	560	j = i + xcc_id * adev->gfx.num_gfx_rings;
	561	amdgpu_mes_unmap_legacy_queue(adev,
	562	&adev->gfx.gfx_ring[j],
	563	PREEMPT_QUEUES, 0, 0);
	564	}
	565	}
	566	return 0;
	567	}
	568
	569	if (!kiq->pmf \|\| !kiq->pmf->kiq_unmap_queues)
	570	return -EINVAL;
	571
	572	spin_lock(&kiq->ring_lock);
	573	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
	574	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
	575	adev->gfx.num_gfx_rings)) {
	576	spin_unlock(&kiq->ring_lock);
	577	return -ENOMEM;
	578	}
	579
	580	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
	581	j = i + xcc_id * adev->gfx.num_gfx_rings;
	582	kiq->pmf->kiq_unmap_queues(kiq_ring,
	583	&adev->gfx.gfx_ring[j],
	584	PREEMPT_QUEUES, 0, 0);
	585	}
	586	}
	587
	588	if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
	589	r = amdgpu_ring_test_helper(kiq_ring);
	590	spin_unlock(&kiq->ring_lock);
	591
	592	return r;
	593	}
	594
	595	int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
	596	int queue_bit)
	597	{
	598	int mec, pipe, queue;
	599	int set_resource_bit = 0;
	600
	601	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
	602
	603	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
	604
	605	return set_resource_bit;
	606	}
	607
	608	static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
	609	{
	610	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	611	struct amdgpu_ring *kiq_ring = &kiq->ring;
	612	uint64_t queue_mask = ~0ULL;
	613	int r, i, j;
	614
	615	amdgpu_device_flush_hdp(adev, NULL);
	616
	617	if (!adev->enable_uni_mes) {
	618	spin_lock(&kiq->ring_lock);
	619	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
	620	if (r) {
	621	dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
	622	spin_unlock(&kiq->ring_lock);
	623	return r;
	624	}
	625
	626	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
	627	r = amdgpu_ring_test_helper(kiq_ring);
	628	spin_unlock(&kiq->ring_lock);
	629	if (r)
	630	dev_err(adev->dev, "KIQ failed to set resources\n");
	631	}
	632
	633	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
	634	j = i + xcc_id * adev->gfx.num_compute_rings;
	635	r = amdgpu_mes_map_legacy_queue(adev,
	636	&adev->gfx.compute_ring[j]);
	637	if (r) {
	638	dev_err(adev->dev, "failed to map compute queue\n");
	639	return r;
	640	}
	641	}
	642
	643	return 0;
	644	}
	645
	646	int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
	647	{
	648	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	649	struct amdgpu_ring *kiq_ring = &kiq->ring;
	650	uint64_t queue_mask = 0;
	651	int r, i, j;
	652
	653	if (adev->mes.enable_legacy_queue_map)
	654	return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
	655
	656	if (!kiq->pmf \|\| !kiq->pmf->kiq_map_queues \|\| !kiq->pmf->kiq_set_resources)
	657	return -EINVAL;
	658
	659	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
	660	if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
	661	continue;
	662
	663	/* This situation may be hit in the future if a new HW
	664	* generation exposes more than 64 queues. If so, the
	665	* definition of queue_mask needs updating */
	666	if (WARN_ON(i > (sizeof(queue_mask)*8))) {
	667	DRM_ERROR("Invalid KCQ enabled: %d\n", i);
	668	break;
	669	}
	670
	671	queue_mask \|= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
	672	}
	673
	674	amdgpu_device_flush_hdp(adev, NULL);
	675
	676	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
	677	kiq_ring->queue);
	678
	679	spin_lock(&kiq->ring_lock);
	680	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
	681	adev->gfx.num_compute_rings +
	682	kiq->pmf->set_resources_size);
	683	if (r) {
	684	DRM_ERROR("Failed to lock KIQ (%d).\n", r);
	685	spin_unlock(&kiq->ring_lock);
	686	return r;
	687	}
	688
	689	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
	690	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
	691	j = i + xcc_id * adev->gfx.num_compute_rings;
	692	kiq->pmf->kiq_map_queues(kiq_ring,
	693	&adev->gfx.compute_ring[j]);
	694	}
	695
	696	r = amdgpu_ring_test_helper(kiq_ring);
	697	spin_unlock(&kiq->ring_lock);
	698	if (r)
	699	DRM_ERROR("KCQ enable failed\n");
	700
	701	return r;
	702	}
	703
	704	int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
	705	{
	706	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	707	struct amdgpu_ring *kiq_ring = &kiq->ring;
	708	int r, i, j;
	709
	710	if (!kiq->pmf \|\| !kiq->pmf->kiq_map_queues)
	711	return -EINVAL;
	712
	713	amdgpu_device_flush_hdp(adev, NULL);
	714
	715	if (adev->mes.enable_legacy_queue_map) {
	716	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
	717	j = i + xcc_id * adev->gfx.num_gfx_rings;
	718	r = amdgpu_mes_map_legacy_queue(adev,
	719	&adev->gfx.gfx_ring[j]);
	720	if (r) {
	721	DRM_ERROR("failed to map gfx queue\n");
	722	return r;
	723	}
	724	}
	725
	726	return 0;
	727	}
	728
	729	spin_lock(&kiq->ring_lock);
	730	/* No need to map kcq on the slave */
	731	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
	732	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
	733	adev->gfx.num_gfx_rings);
	734	if (r) {
	735	DRM_ERROR("Failed to lock KIQ (%d).\n", r);
	736	spin_unlock(&kiq->ring_lock);
	737	return r;
	738	}
	739
	740	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
	741	j = i + xcc_id * adev->gfx.num_gfx_rings;
	742	kiq->pmf->kiq_map_queues(kiq_ring,
	743	&adev->gfx.gfx_ring[j]);
	744	}
	745	}
	746
	747	r = amdgpu_ring_test_helper(kiq_ring);
	748	spin_unlock(&kiq->ring_lock);
	749	if (r)
	750	DRM_ERROR("KGQ enable failed\n");
	751
	752	return r;
	753	}
	754
	755	/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
	756	*
	757	* @adev: amdgpu_device pointer
	758	* @bool enable true: enable gfx off feature, false: disable gfx off feature
	759	*
	760	* 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
	761	* 2. other client can send request to disable gfx off feature, the request should be honored.
	762	* 3. other client can cancel their request of disable gfx off feature
	763	* 4. other client should not send request to enable gfx off feature before disable gfx off feature.
	764	*/
	765
	766	void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
	767	{
	768	unsigned long delay = GFX_OFF_DELAY_ENABLE;
	769
	770	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
	771	return;
	772
	773	mutex_lock(&adev->gfx.gfx_off_mutex);
	774
	775	if (enable) {
	776	/* If the count is already 0, it means there's an imbalance bug somewhere.
	777	* Note that the bug may be in a different caller than the one which triggers the
	778	* WARN_ON_ONCE.
	779	*/
	780	if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
	781	goto unlock;
	782
	783	adev->gfx.gfx_off_req_count--;
	784
	785	if (adev->gfx.gfx_off_req_count == 0 &&
	786	!adev->gfx.gfx_off_state) {
	787	/* If going to s2idle, no need to wait */
	788	if (adev->in_s0ix) {
	789	if (!amdgpu_dpm_set_powergating_by_smu(adev,
	790	AMD_IP_BLOCK_TYPE_GFX, true))
	791	adev->gfx.gfx_off_state = true;
	792	} else {
	793	schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
	794	delay);
	795	}
	796	}
	797	} else {
	798	if (adev->gfx.gfx_off_req_count == 0) {
	799	cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
	800
	801	if (adev->gfx.gfx_off_state &&
	802	!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
	803	adev->gfx.gfx_off_state = false;
	804
	805	if (adev->gfx.funcs->init_spm_golden) {
	806	dev_dbg(adev->dev,
	807	"GFXOFF is disabled, re-init SPM golden settings\n");
	808	amdgpu_gfx_init_spm_golden(adev);
	809	}
	810	}
	811	}
	812
	813	adev->gfx.gfx_off_req_count++;
	814	}
	815
	816	unlock:
	817	mutex_unlock(&adev->gfx.gfx_off_mutex);
	818	}
	819
	820	int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
	821	{
	822	int r = 0;
	823
	824	mutex_lock(&adev->gfx.gfx_off_mutex);
	825
	826	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
	827
	828	mutex_unlock(&adev->gfx.gfx_off_mutex);
	829
	830	return r;
	831	}
	832
	833	int amdgpu_get_gfx_off_residency(struct amdgpu_device adev, u32 value)
	834	{
	835	int r = 0;
	836
	837	mutex_lock(&adev->gfx.gfx_off_mutex);
	838
	839	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
	840
	841	mutex_unlock(&adev->gfx.gfx_off_mutex);
	842
	843	return r;
	844	}
	845
	846	int amdgpu_get_gfx_off_entrycount(struct amdgpu_device adev, u64 value)
	847	{
	848	int r = 0;
	849
	850	mutex_lock(&adev->gfx.gfx_off_mutex);
	851
	852	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
	853
	854	mutex_unlock(&adev->gfx.gfx_off_mutex);
	855
	856	return r;
	857	}
	858
	859	int amdgpu_get_gfx_off_status(struct amdgpu_device adev, uint32_t value)
	860	{
	861
	862	int r = 0;
	863
	864	mutex_lock(&adev->gfx.gfx_off_mutex);
	865
	866	r = amdgpu_dpm_get_status_gfxoff(adev, value);
	867
	868	mutex_unlock(&adev->gfx.gfx_off_mutex);
	869
	870	return r;
	871	}
	872
	873	int amdgpu_gfx_ras_late_init(struct amdgpu_device adev, struct ras_common_if ras_block)
	874	{
	875	int r;
	876
	877	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
	878	if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
	879	r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
	880	if (r)
	881	return r;
	882	}
	883
	884	r = amdgpu_ras_block_late_init(adev, ras_block);
	885	if (r)
	886	return r;
	887
	888	if (adev->gfx.cp_ecc_error_irq.funcs) {
	889	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
	890	if (r)
	891	goto late_fini;
	892	}
	893	} else {
	894	amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
	895	}
	896
	897	return 0;
	898	late_fini:
	899	amdgpu_ras_block_late_fini(adev, ras_block);
	900	return r;
	901	}
	902
	903	int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
	904	{
	905	int err = 0;
	906	struct amdgpu_gfx_ras *ras = NULL;
	907
	908	/* adev->gfx.ras is NULL, which means gfx does not
	909	* support ras function, then do nothing here.
	910	*/
	911	if (!adev->gfx.ras)
	912	return 0;
	913
	914	ras = adev->gfx.ras;
	915
	916	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
	917	if (err) {
	918	dev_err(adev->dev, "Failed to register gfx ras block!\n");
	919	return err;
	920	}
	921
	922	strcpy(ras->ras_block.ras_comm.name, "gfx");
	923	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
	924	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
	925	adev->gfx.ras_if = &ras->ras_block.ras_comm;
	926
	927	/* If not define special ras_late_init function, use gfx default ras_late_init */
	928	if (!ras->ras_block.ras_late_init)
	929	ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
	930
	931	/* If not defined special ras_cb function, use default ras_cb */
	932	if (!ras->ras_block.ras_cb)
	933	ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
	934
	935	return 0;
	936	}
	937
	938	int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
	939	struct amdgpu_iv_entry *entry)
	940	{
	941	if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
	942	return adev->gfx.ras->poison_consumption_handler(adev, entry);
	943
	944	return 0;
	945	}
	946
	947	int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
	948	void *err_data,
	949	struct amdgpu_iv_entry *entry)
	950	{
	951	/* TODO ue will trigger an interrupt.
	952	*
	953	* When “Full RAS” is enabled, the per-IP interrupt sources should
	954	* be disabled and the driver should only look for the aggregated
	955	* interrupt via sync flood
	956	*/
	957	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
	958	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
	959	if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
	960	adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
	961	adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
	962	amdgpu_ras_reset_gpu(adev);
	963	}
	964	return AMDGPU_RAS_SUCCESS;
	965	}
	966
	967	int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
	968	struct amdgpu_irq_src *source,
	969	struct amdgpu_iv_entry *entry)
	970	{
	971	struct ras_common_if *ras_if = adev->gfx.ras_if;
	972	struct ras_dispatch_if ih_data = {
	973	.entry = entry,
	974	};
	975
	976	if (!ras_if)
	977	return 0;
	978
	979	ih_data.head = *ras_if;
	980
	981	DRM_ERROR("CP ECC ERROR IRQ\n");
	982	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
	983	return 0;
	984	}
	985
	986	void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
	987	void *ras_error_status,
	988	void (func)(struct amdgpu_device adev, void *ras_error_status,
	989	int xcc_id))
	990	{
	991	int i;
	992	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
	993	uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
	994	struct ras_err_data err_data = (struct ras_err_data )ras_error_status;
	995
	996	if (err_data) {
	997	err_data->ue_count = 0;
	998	err_data->ce_count = 0;
	999	}
	1000
	1001	for_each_inst(i, xcc_mask)
	1002	func(adev, ras_error_status, i);
	1003	}
	1004
	1005	uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
	1006	{
	1007	signed long r, cnt = 0;
	1008	unsigned long flags;
	1009	uint32_t seq, reg_val_offs = 0, value = 0;
	1010	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	1011	struct amdgpu_ring *ring = &kiq->ring;
	1012
	1013	if (amdgpu_device_skip_hw_access(adev))
	1014	return 0;
	1015
	1016	if (adev->mes.ring[0].sched.ready)
	1017	return amdgpu_mes_rreg(adev, reg);
	1018
	1019	BUG_ON(!ring->funcs->emit_rreg);
	1020
	1021	spin_lock_irqsave(&kiq->ring_lock, flags);
	1022	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
	1023	pr_err("critical bug! too many kiq readers\n");
	1024	goto failed_unlock;
	1025	}
	1026	r = amdgpu_ring_alloc(ring, 32);
	1027	if (r)
	1028	goto failed_unlock;
	1029
	1030	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
	1031	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
	1032	if (r)
	1033	goto failed_undo;
	1034
	1035	amdgpu_ring_commit(ring);
	1036	spin_unlock_irqrestore(&kiq->ring_lock, flags);
	1037
	1038	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
	1039
	1040	/* don't wait anymore for gpu reset case because this way may
	1041	* block gpu_recover() routine forever, e.g. this virt_kiq_rreg
	1042	* is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
	1043	* never return if we keep waiting in virt_kiq_rreg, which cause
	1044	* gpu_recover() hang there.
	1045	*
	1046	* also don't wait anymore for IRQ context
	1047	* */
	1048	if (r < 1 && (amdgpu_in_reset(adev) \|\| in_interrupt()))
	1049	goto failed_kiq_read;
	1050
	1051	might_sleep();
	1052	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
	1053	msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
	1054	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
	1055	}
	1056
	1057	if (cnt > MAX_KIQ_REG_TRY)
	1058	goto failed_kiq_read;
	1059
	1060	mb();
	1061	value = adev->wb.wb[reg_val_offs];
	1062	amdgpu_device_wb_free(adev, reg_val_offs);
	1063	return value;
	1064
	1065	failed_undo:
	1066	amdgpu_ring_undo(ring);
	1067	failed_unlock:
	1068	spin_unlock_irqrestore(&kiq->ring_lock, flags);
	1069	failed_kiq_read:
	1070	if (reg_val_offs)
	1071	amdgpu_device_wb_free(adev, reg_val_offs);
	1072	dev_err(adev->dev, "failed to read reg:%x\n", reg);
	1073	return ~0;
	1074	}
	1075
	1076	void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
	1077	{
	1078	signed long r, cnt = 0;
	1079	unsigned long flags;
	1080	uint32_t seq;
	1081	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	1082	struct amdgpu_ring *ring = &kiq->ring;
	1083
	1084	BUG_ON(!ring->funcs->emit_wreg);
	1085
	1086	if (amdgpu_device_skip_hw_access(adev))
	1087	return;
	1088
	1089	if (adev->mes.ring[0].sched.ready) {
	1090	amdgpu_mes_wreg(adev, reg, v);
	1091	return;
	1092	}
	1093
	1094	spin_lock_irqsave(&kiq->ring_lock, flags);
	1095	r = amdgpu_ring_alloc(ring, 32);
	1096	if (r)
	1097	goto failed_unlock;
	1098
	1099	amdgpu_ring_emit_wreg(ring, reg, v);
	1100	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
	1101	if (r)
	1102	goto failed_undo;
	1103
	1104	amdgpu_ring_commit(ring);
	1105	spin_unlock_irqrestore(&kiq->ring_lock, flags);
	1106
	1107	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
	1108
	1109	/* don't wait anymore for gpu reset case because this way may
	1110	* block gpu_recover() routine forever, e.g. this virt_kiq_rreg
	1111	* is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
	1112	* never return if we keep waiting in virt_kiq_rreg, which cause
	1113	* gpu_recover() hang there.
	1114	*
	1115	* also don't wait anymore for IRQ context
	1116	* */
	1117	if (r < 1 && (amdgpu_in_reset(adev) \|\| in_interrupt()))
	1118	goto failed_kiq_write;
	1119
	1120	might_sleep();
	1121	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
	1122
	1123	msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
	1124	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
	1125	}
	1126
	1127	if (cnt > MAX_KIQ_REG_TRY)
	1128	goto failed_kiq_write;
	1129
	1130	return;
	1131
	1132	failed_undo:
	1133	amdgpu_ring_undo(ring);
	1134	failed_unlock:
	1135	spin_unlock_irqrestore(&kiq->ring_lock, flags);
	1136	failed_kiq_write:
	1137	dev_err(adev->dev, "failed to write reg:%x\n", reg);
	1138	}
	1139
	1140	int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
	1141	{
	1142	if (amdgpu_num_kcq == -1) {
	1143	return 8;
	1144	} else if (amdgpu_num_kcq > 8 \|\| amdgpu_num_kcq < 0) {
	1145	dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
	1146	return 8;
	1147	}
	1148	return amdgpu_num_kcq;
	1149	}
	1150
	1151	void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
	1152	uint32_t ucode_id)
	1153	{
	1154	const struct gfx_firmware_header_v1_0 *cp_hdr;
	1155	const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
	1156	struct amdgpu_firmware_info *info = NULL;
	1157	const struct firmware *ucode_fw;
	1158	unsigned int fw_size;
	1159
	1160	switch (ucode_id) {
	1161	case AMDGPU_UCODE_ID_CP_PFP:
	1162	cp_hdr = (const struct gfx_firmware_header_v1_0 *)
	1163	adev->gfx.pfp_fw->data;
	1164	adev->gfx.pfp_fw_version =
	1165	le32_to_cpu(cp_hdr->header.ucode_version);
	1166	adev->gfx.pfp_feature_version =
	1167	le32_to_cpu(cp_hdr->ucode_feature_version);
	1168	ucode_fw = adev->gfx.pfp_fw;
	1169	fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
	1170	break;
	1171	case AMDGPU_UCODE_ID_CP_RS64_PFP:
	1172	cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
	1173	adev->gfx.pfp_fw->data;
	1174	adev->gfx.pfp_fw_version =
	1175	le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
	1176	adev->gfx.pfp_feature_version =
	1177	le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
	1178	ucode_fw = adev->gfx.pfp_fw;
	1179	fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
	1180	break;
	1181	case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
	1182	case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
	1183	cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
	1184	adev->gfx.pfp_fw->data;
	1185	ucode_fw = adev->gfx.pfp_fw;
	1186	fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
	1187	break;
	1188	case AMDGPU_UCODE_ID_CP_ME:
	1189	cp_hdr = (const struct gfx_firmware_header_v1_0 *)
	1190	adev->gfx.me_fw->data;
	1191	adev->gfx.me_fw_version =
	1192	le32_to_cpu(cp_hdr->header.ucode_version);
	1193	adev->gfx.me_feature_version =
	1194	le32_to_cpu(cp_hdr->ucode_feature_version);
	1195	ucode_fw = adev->gfx.me_fw;
	1196	fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
	1197	break;
	1198	case AMDGPU_UCODE_ID_CP_RS64_ME:
	1199	cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
	1200	adev->gfx.me_fw->data;
	1201	adev->gfx.me_fw_version =
	1202	le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
	1203	adev->gfx.me_feature_version =
	1204	le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
	1205	ucode_fw = adev->gfx.me_fw;
	1206	fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
	1207	break;
	1208	case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
	1209	case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
	1210	cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
	1211	adev->gfx.me_fw->data;
	1212	ucode_fw = adev->gfx.me_fw;
	1213	fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
	1214	break;
	1215	case AMDGPU_UCODE_ID_CP_CE:
	1216	cp_hdr = (const struct gfx_firmware_header_v1_0 *)
	1217	adev->gfx.ce_fw->data;
	1218	adev->gfx.ce_fw_version =
	1219	le32_to_cpu(cp_hdr->header.ucode_version);
	1220	adev->gfx.ce_feature_version =
	1221	le32_to_cpu(cp_hdr->ucode_feature_version);
	1222	ucode_fw = adev->gfx.ce_fw;
	1223	fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
	1224	break;
	1225	case AMDGPU_UCODE_ID_CP_MEC1:
	1226	cp_hdr = (const struct gfx_firmware_header_v1_0 *)
	1227	adev->gfx.mec_fw->data;
	1228	adev->gfx.mec_fw_version =
	1229	le32_to_cpu(cp_hdr->header.ucode_version);
	1230	adev->gfx.mec_feature_version =
	1231	le32_to_cpu(cp_hdr->ucode_feature_version);
	1232	ucode_fw = adev->gfx.mec_fw;
	1233	fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
	1234	le32_to_cpu(cp_hdr->jt_size) * 4;
	1235	break;
	1236	case AMDGPU_UCODE_ID_CP_MEC1_JT:
	1237	cp_hdr = (const struct gfx_firmware_header_v1_0 *)
	1238	adev->gfx.mec_fw->data;
	1239	ucode_fw = adev->gfx.mec_fw;
	1240	fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
	1241	break;
	1242	case AMDGPU_UCODE_ID_CP_MEC2:
	1243	cp_hdr = (const struct gfx_firmware_header_v1_0 *)
	1244	adev->gfx.mec2_fw->data;
	1245	adev->gfx.mec2_fw_version =
	1246	le32_to_cpu(cp_hdr->header.ucode_version);
	1247	adev->gfx.mec2_feature_version =
	1248	le32_to_cpu(cp_hdr->ucode_feature_version);
	1249	ucode_fw = adev->gfx.mec2_fw;
	1250	fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
	1251	le32_to_cpu(cp_hdr->jt_size) * 4;
	1252	break;
	1253	case AMDGPU_UCODE_ID_CP_MEC2_JT:
	1254	cp_hdr = (const struct gfx_firmware_header_v1_0 *)
	1255	adev->gfx.mec2_fw->data;
	1256	ucode_fw = adev->gfx.mec2_fw;
	1257	fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
	1258	break;
	1259	case AMDGPU_UCODE_ID_CP_RS64_MEC:
	1260	cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
	1261	adev->gfx.mec_fw->data;
	1262	adev->gfx.mec_fw_version =
	1263	le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
	1264	adev->gfx.mec_feature_version =
	1265	le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
	1266	ucode_fw = adev->gfx.mec_fw;
	1267	fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
	1268	break;
	1269	case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
	1270	case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
	1271	case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
	1272	case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
	1273	cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
	1274	adev->gfx.mec_fw->data;
	1275	ucode_fw = adev->gfx.mec_fw;
	1276	fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
	1277	break;
	1278	default:
	1279	dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
	1280	return;
	1281	}
	1282
	1283	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
	1284	info = &adev->firmware.ucode[ucode_id];
	1285	info->ucode_id = ucode_id;
	1286	info->fw = ucode_fw;
	1287	adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
	1288	}
	1289	}
	1290
	1291	bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
	1292	{
	1293	return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
	1294	adev->gfx.num_xcc_per_xcp : 1));
	1295	}
	1296
	1297	static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
	1298	struct device_attribute *addr,
	1299	char *buf)
	1300	{
	1301	struct drm_device *ddev = dev_get_drvdata(dev);
	1302	struct amdgpu_device *adev = drm_to_adev(ddev);
	1303	int mode;
	1304
	1305	mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
	1306	AMDGPU_XCP_FL_NONE);
	1307
	1308	return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
	1309	}
	1310
	1311	static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
	1312	struct device_attribute *addr,
	1313	const char *buf, size_t count)
	1314	{
	1315	struct drm_device *ddev = dev_get_drvdata(dev);
	1316	struct amdgpu_device *adev = drm_to_adev(ddev);
	1317	enum amdgpu_gfx_partition mode;
	1318	int ret = 0, num_xcc;
	1319
	1320	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
	1321	if (num_xcc % 2 != 0)
	1322	return -EINVAL;
	1323
	1324	if (!strncasecmp("SPX", buf, strlen("SPX"))) {
	1325	mode = AMDGPU_SPX_PARTITION_MODE;
	1326	} else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
	1327	/*
	1328	* DPX mode needs AIDs to be in multiple of 2.
	1329	* Each AID connects 2 XCCs.
	1330	*/
	1331	if (num_xcc%4)
	1332	return -EINVAL;
	1333	mode = AMDGPU_DPX_PARTITION_MODE;
	1334	} else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
	1335	if (num_xcc != 6)
	1336	return -EINVAL;
	1337	mode = AMDGPU_TPX_PARTITION_MODE;
	1338	} else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
	1339	if (num_xcc != 8)
	1340	return -EINVAL;
	1341	mode = AMDGPU_QPX_PARTITION_MODE;
	1342	} else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
	1343	mode = AMDGPU_CPX_PARTITION_MODE;
	1344	} else {
	1345	return -EINVAL;
	1346	}
	1347
	1348	ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
	1349
	1350	if (ret)
	1351	return ret;
	1352
	1353	return count;
	1354	}
	1355
	1356	static const char *xcp_desc[] = {
	1357	[AMDGPU_SPX_PARTITION_MODE] = "SPX",
	1358	[AMDGPU_DPX_PARTITION_MODE] = "DPX",
	1359	[AMDGPU_TPX_PARTITION_MODE] = "TPX",
	1360	[AMDGPU_QPX_PARTITION_MODE] = "QPX",
	1361	[AMDGPU_CPX_PARTITION_MODE] = "CPX",
	1362	};
	1363
	1364	static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
	1365	struct device_attribute *addr,
	1366	char *buf)
	1367	{
	1368	struct drm_device *ddev = dev_get_drvdata(dev);
	1369	struct amdgpu_device *adev = drm_to_adev(ddev);
	1370	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
	1371	int size = 0, mode;
	1372	char *sep = "";
	1373
	1374	if (!xcp_mgr \|\| !xcp_mgr->avail_xcp_modes)
	1375	return sysfs_emit(buf, "Not supported\n");
	1376
	1377	for_each_inst(mode, xcp_mgr->avail_xcp_modes) {
	1378	size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
	1379	sep = ", ";
	1380	}
	1381
	1382	size += sysfs_emit_at(buf, size, "\n");
	1383
	1384	return size;
	1385	}
	1386
	1387	static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
	1388	{
	1389	struct amdgpu_device *adev = ring->adev;
	1390	struct drm_gpu_scheduler *sched = &ring->sched;
	1391	struct drm_sched_entity entity;
	1392	struct dma_fence *f;
	1393	struct amdgpu_job *job;
	1394	struct amdgpu_ib *ib;
	1395	int i, r;
	1396
	1397	/* Initialize the scheduler entity */
	1398	r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
	1399	&sched, 1, NULL);
	1400	if (r) {
	1401	dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
	1402	goto err;
	1403	}
	1404
	1405	r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL,
	1406	64, 0,
	1407	&job);
	1408	if (r)
	1409	goto err;
	1410
	1411	job->enforce_isolation = true;
	1412
	1413	ib = &job->ibs[0];
	1414	for (i = 0; i <= ring->funcs->align_mask; ++i)
	1415	ib->ptr[i] = ring->funcs->nop;
	1416	ib->length_dw = ring->funcs->align_mask + 1;
	1417
	1418	f = amdgpu_job_submit(job);
	1419
	1420	r = dma_fence_wait(f, false);
	1421	if (r)
	1422	goto err;
	1423
	1424	dma_fence_put(f);
	1425
	1426	/* Clean up the scheduler entity */
	1427	drm_sched_entity_destroy(&entity);
	1428	return 0;
	1429
	1430	err:
	1431	return r;
	1432	}
	1433
	1434	static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
	1435	{
	1436	int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
	1437	struct amdgpu_ring *ring;
	1438	int num_xcc_to_clear;
	1439	int i, r, xcc_id;
	1440
	1441	if (adev->gfx.num_xcc_per_xcp)
	1442	num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
	1443	else
	1444	num_xcc_to_clear = 1;
	1445
	1446	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
	1447	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
	1448	ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
	1449	if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
	1450	r = amdgpu_gfx_run_cleaner_shader_job(ring);
	1451	if (r)
	1452	return r;
	1453	num_xcc_to_clear--;
	1454	break;
	1455	}
	1456	}
	1457	}
	1458
	1459	if (num_xcc_to_clear)
	1460	return -ENOENT;
	1461
	1462	return 0;
	1463	}
	1464
	1465	static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
	1466	struct device_attribute *attr,
	1467	const char *buf,
	1468	size_t count)
	1469	{
	1470	struct drm_device *ddev = dev_get_drvdata(dev);
	1471	struct amdgpu_device *adev = drm_to_adev(ddev);
	1472	int ret;
	1473	long value;
	1474
	1475	if (amdgpu_in_reset(adev))
	1476	return -EPERM;
	1477	if (adev->in_suspend && !adev->in_runpm)
	1478	return -EPERM;
	1479
	1480	ret = kstrtol(buf, 0, &value);
	1481
	1482	if (ret)
	1483	return -EINVAL;
	1484
	1485	if (value < 0)
	1486	return -EINVAL;
	1487
	1488	if (adev->xcp_mgr) {
	1489	if (value >= adev->xcp_mgr->num_xcps)
	1490	return -EINVAL;
	1491	} else {
	1492	if (value > 1)
	1493	return -EINVAL;
	1494	}
	1495
	1496	ret = pm_runtime_get_sync(ddev->dev);
	1497	if (ret < 0) {
	1498	pm_runtime_put_autosuspend(ddev->dev);
	1499	return ret;
	1500	}
	1501
	1502	ret = amdgpu_gfx_run_cleaner_shader(adev, value);
	1503
	1504	pm_runtime_mark_last_busy(ddev->dev);
	1505	pm_runtime_put_autosuspend(ddev->dev);
	1506
	1507	if (ret)
	1508	return ret;
	1509
	1510	return count;
	1511	}
	1512
	1513	static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
	1514	struct device_attribute *attr,
	1515	char *buf)
	1516	{
	1517	struct drm_device *ddev = dev_get_drvdata(dev);
	1518	struct amdgpu_device *adev = drm_to_adev(ddev);
	1519	int i;
	1520	ssize_t size = 0;
	1521
	1522	if (adev->xcp_mgr) {
	1523	for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
	1524	size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
	1525	if (i < (adev->xcp_mgr->num_xcps - 1))
	1526	size += sysfs_emit_at(buf, size, " ");
	1527	}
	1528	buf[size++] = '\n';
	1529	} else {
	1530	size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
	1531	}
	1532
	1533	return size;
	1534	}
	1535
	1536	static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
	1537	struct device_attribute *attr,
	1538	const char *buf, size_t count)
	1539	{
	1540	struct drm_device *ddev = dev_get_drvdata(dev);
	1541	struct amdgpu_device *adev = drm_to_adev(ddev);
	1542	long partition_values[MAX_XCP] = {0};
	1543	int ret, i, num_partitions;
	1544	const char *input_buf = buf;
	1545
	1546	for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
	1547	ret = sscanf(input_buf, "%ld", &partition_values[i]);
	1548	if (ret <= 0)
	1549	break;
	1550
	1551	/* Move the pointer to the next value in the string */
	1552	input_buf = strchr(input_buf, ' ');
	1553	if (input_buf) {
	1554	input_buf++;
	1555	} else {
	1556	i++;
	1557	break;
	1558	}
	1559	}
	1560	num_partitions = i;
	1561
	1562	if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
	1563	return -EINVAL;
	1564
	1565	if (!adev->xcp_mgr && num_partitions != 1)
	1566	return -EINVAL;
	1567
	1568	for (i = 0; i < num_partitions; i++) {
	1569	if (partition_values[i] != 0 && partition_values[i] != 1)
	1570	return -EINVAL;
	1571	}
	1572
	1573	mutex_lock(&adev->enforce_isolation_mutex);
	1574
	1575	for (i = 0; i < num_partitions; i++) {
	1576	if (adev->enforce_isolation[i] && !partition_values[i]) {
	1577	/* Going from enabled to disabled */
	1578	amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
	1579	} else if (!adev->enforce_isolation[i] && partition_values[i]) {
	1580	/* Going from disabled to enabled */
	1581	amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
	1582	}
	1583	adev->enforce_isolation[i] = partition_values[i];
	1584	}
	1585
	1586	mutex_unlock(&adev->enforce_isolation_mutex);
	1587
	1588	return count;
	1589	}
	1590
	1591	static DEVICE_ATTR(run_cleaner_shader, 0200,
	1592	NULL, amdgpu_gfx_set_run_cleaner_shader);
	1593
	1594	static DEVICE_ATTR(enforce_isolation, 0644,
	1595	amdgpu_gfx_get_enforce_isolation,
	1596	amdgpu_gfx_set_enforce_isolation);
	1597
	1598	static DEVICE_ATTR(current_compute_partition, 0644,
	1599	amdgpu_gfx_get_current_compute_partition,
	1600	amdgpu_gfx_set_compute_partition);
	1601
	1602	static DEVICE_ATTR(available_compute_partition, 0444,
	1603	amdgpu_gfx_get_available_compute_partition, NULL);
	1604
	1605	int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
	1606	{
	1607	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
	1608	bool xcp_switch_supported;
	1609	int r;
	1610
	1611	if (!xcp_mgr)
	1612	return 0;
	1613
	1614	xcp_switch_supported =
	1615	(xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
	1616
	1617	if (!xcp_switch_supported)
	1618	dev_attr_current_compute_partition.attr.mode &=
	1619	~(S_IWUSR \| S_IWGRP \| S_IWOTH);
	1620
	1621	r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
	1622	if (r)
	1623	return r;
	1624
	1625	if (xcp_switch_supported)
	1626	r = device_create_file(adev->dev,
	1627	&dev_attr_available_compute_partition);
	1628
	1629	return r;
	1630	}
	1631
	1632	void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
	1633	{
	1634	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
	1635	bool xcp_switch_supported;
	1636
	1637	if (!xcp_mgr)
	1638	return;
	1639
	1640	xcp_switch_supported =
	1641	(xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
	1642	device_remove_file(adev->dev, &dev_attr_current_compute_partition);
	1643
	1644	if (xcp_switch_supported)
	1645	device_remove_file(adev->dev,
	1646	&dev_attr_available_compute_partition);
	1647	}
	1648
	1649	int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
	1650	{
	1651	int r;
	1652
	1653	r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
	1654	if (r)
	1655	return r;
	1656
	1657	r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
	1658	if (r)
	1659	return r;
	1660
	1661	return 0;
	1662	}
	1663
	1664	void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
	1665	{
	1666	device_remove_file(adev->dev, &dev_attr_enforce_isolation);
	1667	device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
	1668	}
	1669
	1670	int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
	1671	unsigned int cleaner_shader_size)
	1672	{
	1673	if (!adev->gfx.enable_cleaner_shader)
	1674	return -EOPNOTSUPP;
	1675
	1676	return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
	1677	AMDGPU_GEM_DOMAIN_VRAM \| AMDGPU_GEM_DOMAIN_GTT,
	1678	&adev->gfx.cleaner_shader_obj,
	1679	&adev->gfx.cleaner_shader_gpu_addr,
	1680	(void **)&adev->gfx.cleaner_shader_cpu_ptr);
	1681	}
	1682
	1683	void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
	1684	{
	1685	if (!adev->gfx.enable_cleaner_shader)
	1686	return;
	1687
	1688	amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
	1689	&adev->gfx.cleaner_shader_gpu_addr,
	1690	(void **)&adev->gfx.cleaner_shader_cpu_ptr);
	1691	}
	1692
	1693	void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
	1694	unsigned int cleaner_shader_size,
	1695	const void *cleaner_shader_ptr)
	1696	{
	1697	if (!adev->gfx.enable_cleaner_shader)
	1698	return;
	1699
	1700	if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
	1701	memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
	1702	cleaner_shader_size);
	1703	}
	1704
	1705	/**
	1706	* amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
	1707	* @adev: amdgpu_device pointer
	1708	* @idx: Index of the scheduler to control
	1709	* @enable: Whether to enable or disable the KFD scheduler
	1710	*
	1711	* This function is used to control the KFD (Kernel Fusion Driver) scheduler
	1712	* from the KGD. It is part of the cleaner shader feature. This function plays
	1713	* a key role in enforcing process isolation on the GPU.
	1714	*
	1715	* The function uses a reference count mechanism (kfd_sch_req_count) to keep
	1716	* track of the number of requests to enable the KFD scheduler. When a request
	1717	* to enable the KFD scheduler is made, the reference count is decremented.
	1718	* When the reference count reaches zero, a delayed work is scheduled to
	1719	* enforce isolation after a delay of GFX_SLICE_PERIOD.
	1720	*
	1721	* When a request to disable the KFD scheduler is made, the function first
	1722	* checks if the reference count is zero. If it is, it cancels the delayed work
	1723	* for enforcing isolation and checks if the KFD scheduler is active. If the
	1724	* KFD scheduler is active, it sends a request to stop the KFD scheduler and
	1725	* sets the KFD scheduler state to inactive. Then, it increments the reference
	1726	* count.
	1727	*
	1728	* The function is synchronized using the kfd_sch_mutex to ensure that the KFD
	1729	* scheduler state and reference count are updated atomically.
	1730	*
	1731	* Note: If the reference count is already zero when a request to enable the
	1732	* KFD scheduler is made, it means there's an imbalance bug somewhere. The
	1733	* function triggers a warning in this case.
	1734	*/
	1735	static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
	1736	bool enable)
	1737	{
	1738	mutex_lock(&adev->gfx.kfd_sch_mutex);
	1739
	1740	if (enable) {
	1741	/* If the count is already 0, it means there's an imbalance bug somewhere.
	1742	* Note that the bug may be in a different caller than the one which triggers the
	1743	* WARN_ON_ONCE.
	1744	*/
	1745	if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
	1746	dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
	1747	goto unlock;
	1748	}
	1749
	1750	adev->gfx.kfd_sch_req_count[idx]--;
	1751
	1752	if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
	1753	adev->gfx.kfd_sch_inactive[idx]) {
	1754	schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
	1755	msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
	1756	}
	1757	} else {
	1758	if (adev->gfx.kfd_sch_req_count[idx] == 0) {
	1759	cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
	1760	if (!adev->gfx.kfd_sch_inactive[idx]) {
	1761	amdgpu_amdkfd_stop_sched(adev, idx);
	1762	adev->gfx.kfd_sch_inactive[idx] = true;
	1763	}
	1764	}
	1765
	1766	adev->gfx.kfd_sch_req_count[idx]++;
	1767	}
	1768
	1769	unlock:
	1770	mutex_unlock(&adev->gfx.kfd_sch_mutex);
	1771	}
	1772
	1773	/**
	1774	* amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
	1775	*
	1776	* @work: work_struct.
	1777	*
	1778	* This function is the work handler for enforcing shader isolation on AMD GPUs.
	1779	* It counts the number of emitted fences for each GFX and compute ring. If there
	1780	* are any fences, it schedules the `enforce_isolation_work` to be run after a
	1781	* delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
	1782	* Driver (KFD) to resume the runqueue. The function is synchronized using the
	1783	* `enforce_isolation_mutex`.
	1784	*/
	1785	void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
	1786	{
	1787	struct amdgpu_isolation_work *isolation_work =
	1788	container_of(work, struct amdgpu_isolation_work, work.work);
	1789	struct amdgpu_device *adev = isolation_work->adev;
	1790	u32 i, idx, fences = 0;
	1791
	1792	if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
	1793	idx = 0;
	1794	else
	1795	idx = isolation_work->xcp_id;
	1796
	1797	if (idx >= MAX_XCP)
	1798	return;
	1799
	1800	mutex_lock(&adev->enforce_isolation_mutex);
	1801	for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
	1802	if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
	1803	fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
	1804	}
	1805	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
	1806	if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
	1807	fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
	1808	}
	1809	if (fences) {
	1810	/* we've already had our timeslice, so let's wrap this up */
	1811	schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
	1812	msecs_to_jiffies(1));
	1813	} else {
	1814	/* Tell KFD to resume the runqueue */
	1815	if (adev->kfd.init_complete) {
	1816	WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
	1817	WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
	1818	amdgpu_amdkfd_start_sched(adev, idx);
	1819	adev->gfx.kfd_sch_inactive[idx] = false;
	1820	}
	1821	}
	1822	mutex_unlock(&adev->enforce_isolation_mutex);
	1823	}
	1824
	1825	static void
	1826	amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
	1827	u32 idx)
	1828	{
	1829	unsigned long cjiffies;
	1830	bool wait = false;
	1831
	1832	mutex_lock(&adev->enforce_isolation_mutex);
	1833	if (adev->enforce_isolation[idx]) {
	1834	/* set the initial values if nothing is set */
	1835	if (!adev->gfx.enforce_isolation_jiffies[idx]) {
	1836	adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
	1837	adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
	1838	}
	1839	/* Make sure KFD gets a chance to run */
	1840	if (amdgpu_amdkfd_compute_active(adev, idx)) {
	1841	cjiffies = jiffies;
	1842	if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) {
	1843	cjiffies -= adev->gfx.enforce_isolation_jiffies[idx];
	1844	if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) {
	1845	/* if our time is up, let KGD work drain before scheduling more */
	1846	wait = true;
	1847	/* reset the timer period */
	1848	adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
	1849	} else {
	1850	/* set the timer period to what's left in our time slice */
	1851	adev->gfx.enforce_isolation_time[idx] =
	1852	GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies);
	1853	}
	1854	} else {
	1855	/* if jiffies wrap around we will just wait a little longer */
	1856	adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
	1857	}
	1858	} else {
	1859	/* if there is no KFD work, then set the full slice period */
	1860	adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
	1861	adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
	1862	}
	1863	}
	1864	mutex_unlock(&adev->enforce_isolation_mutex);
	1865
	1866	if (wait)
	1867	msleep(GFX_SLICE_PERIOD_MS);
	1868	}
	1869
	1870	void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
	1871	{
	1872	struct amdgpu_device *adev = ring->adev;
	1873	u32 idx;
	1874
	1875	if (!adev->gfx.enable_cleaner_shader)
	1876	return;
	1877
	1878	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
	1879	idx = 0;
	1880	else
	1881	idx = ring->xcp_id;
	1882
	1883	if (idx >= MAX_XCP)
	1884	return;
	1885
	1886	/* Don't submit more work until KFD has had some time */
	1887	amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
	1888
	1889	mutex_lock(&adev->enforce_isolation_mutex);
	1890	if (adev->enforce_isolation[idx]) {
	1891	if (adev->kfd.init_complete)
	1892	amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
	1893	}
	1894	mutex_unlock(&adev->enforce_isolation_mutex);
	1895	}
	1896
	1897	void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
	1898	{
	1899	struct amdgpu_device *adev = ring->adev;
	1900	u32 idx;
	1901
	1902	if (!adev->gfx.enable_cleaner_shader)
	1903	return;
	1904
	1905	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
	1906	idx = 0;
	1907	else
	1908	idx = ring->xcp_id;
	1909
	1910	if (idx >= MAX_XCP)
	1911	return;
	1912
	1913	mutex_lock(&adev->enforce_isolation_mutex);
	1914	if (adev->enforce_isolation[idx]) {
	1915	if (adev->kfd.init_complete)
	1916	amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
	1917	}
	1918	mutex_unlock(&adev->enforce_isolation_mutex);
	1919	}
	1920
	1921	/*
	1922	* debugfs for to enable/disable gfx job submission to specific core.
	1923	*/
	1924	#if defined(CONFIG_DEBUG_FS)
	1925	static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
	1926	{
	1927	struct amdgpu_device adev = (struct amdgpu_device )data;
	1928	u32 i;
	1929	u64 mask = 0;
	1930	struct amdgpu_ring *ring;
	1931
	1932	if (!adev)
	1933	return -ENODEV;
	1934
	1935	mask = (1 << adev->gfx.num_gfx_rings) - 1;
	1936	if ((val & mask) == 0)
	1937	return -EINVAL;
	1938
	1939	for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
	1940	ring = &adev->gfx.gfx_ring[i];
	1941	if (val & (1 << i))
	1942	ring->sched.ready = true;
	1943	else
	1944	ring->sched.ready = false;
	1945	}
	1946	/* publish sched.ready flag update effective immediately across smp */
	1947	smp_rmb();
	1948	return 0;
	1949	}
	1950
	1951	static int amdgpu_debugfs_gfx_sched_mask_get(void data, u64 val)
	1952	{
	1953	struct amdgpu_device adev = (struct amdgpu_device )data;
	1954	u32 i;
	1955	u64 mask = 0;
	1956	struct amdgpu_ring *ring;
	1957
	1958	if (!adev)
	1959	return -ENODEV;
	1960	for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
	1961	ring = &adev->gfx.gfx_ring[i];
	1962	if (ring->sched.ready)
	1963	mask \|= 1 << i;
	1964	}
	1965
	1966	*val = mask;
	1967	return 0;
	1968	}
	1969
	1970	DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops,
	1971	amdgpu_debugfs_gfx_sched_mask_get,
	1972	amdgpu_debugfs_gfx_sched_mask_set, "%llx\n");
	1973
	1974	#endif
	1975
	1976	void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev)
	1977	{
	1978	#if defined(CONFIG_DEBUG_FS)
	1979	struct drm_minor *minor = adev_to_drm(adev)->primary;
	1980	struct dentry *root = minor->debugfs_root;
	1981	char name[32];
	1982
	1983	if (!(adev->gfx.num_gfx_rings > 1))
	1984	return;
	1985	sprintf(name, "amdgpu_gfx_sched_mask");
	1986	debugfs_create_file(name, 0600, root, adev,
	1987	&amdgpu_debugfs_gfx_sched_mask_fops);
	1988	#endif
	1989	}
	1990
	1991	/*
	1992	* debugfs for to enable/disable compute job submission to specific core.
	1993	*/
	1994	#if defined(CONFIG_DEBUG_FS)
	1995	static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
	1996	{
	1997	struct amdgpu_device adev = (struct amdgpu_device )data;
	1998	u32 i;
	1999	u64 mask = 0;
	2000	struct amdgpu_ring *ring;
	2001
	2002	if (!adev)
	2003	return -ENODEV;
	2004
	2005	mask = (1 << adev->gfx.num_compute_rings) - 1;
	2006	if ((val & mask) == 0)
	2007	return -EINVAL;
	2008
	2009	for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
	2010	ring = &adev->gfx.compute_ring[i];
	2011	if (val & (1 << i))
	2012	ring->sched.ready = true;
	2013	else
	2014	ring->sched.ready = false;
	2015	}
	2016
	2017	/* publish sched.ready flag update effective immediately across smp */
	2018	smp_rmb();
	2019	return 0;
	2020	}
	2021
	2022	static int amdgpu_debugfs_compute_sched_mask_get(void data, u64 val)
	2023	{
	2024	struct amdgpu_device adev = (struct amdgpu_device )data;
	2025	u32 i;
	2026	u64 mask = 0;
	2027	struct amdgpu_ring *ring;
	2028
	2029	if (!adev)
	2030	return -ENODEV;
	2031	for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
	2032	ring = &adev->gfx.compute_ring[i];
	2033	if (ring->sched.ready)
	2034	mask \|= 1 << i;
	2035	}
	2036
	2037	*val = mask;
	2038	return 0;
	2039	}
	2040
	2041	DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops,
	2042	amdgpu_debugfs_compute_sched_mask_get,
	2043	amdgpu_debugfs_compute_sched_mask_set, "%llx\n");
	2044
	2045	#endif
	2046
	2047	void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
	2048	{
	2049	#if defined(CONFIG_DEBUG_FS)
	2050	struct drm_minor *minor = adev_to_drm(adev)->primary;
	2051	struct dentry *root = minor->debugfs_root;
	2052	char name[32];
	2053
	2054	if (!(adev->gfx.num_compute_rings > 1))
	2055	return;
	2056	sprintf(name, "amdgpu_compute_sched_mask");
	2057	debugfs_create_file(name, 0600, root, adev,
	2058	&amdgpu_debugfs_compute_sched_mask_fops);
	2059	#endif
	2060	}