]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdkfd/kfd_topology.c
Linux 6.14-rc3
[linux.git] / drivers / gpu / drm / amd / amdkfd / kfd_topology.c
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/pci.h>
27 #include <linux/errno.h>
28 #include <linux/acpi.h>
29 #include <linux/hash.h>
30 #include <linux/cpufreq.h>
31 #include <linux/log2.h>
32 #include <linux/dmi.h>
33 #include <linux/atomic.h>
34 #include <linux/crc16.h>
35
36 #include "kfd_priv.h"
37 #include "kfd_crat.h"
38 #include "kfd_topology.h"
39 #include "kfd_device_queue_manager.h"
40 #include "kfd_svm.h"
41 #include "kfd_debug.h"
42 #include "amdgpu_amdkfd.h"
43 #include "amdgpu_ras.h"
44 #include "amdgpu.h"
45
46 /* topology_device_list - Master list of all topology devices */
47 static struct list_head topology_device_list;
48 static struct kfd_system_properties sys_props;
49
50 static DECLARE_RWSEM(topology_lock);
51 static uint32_t topology_crat_proximity_domain;
52
53 struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
54                                                 uint32_t proximity_domain)
55 {
56         struct kfd_topology_device *top_dev;
57         struct kfd_topology_device *device = NULL;
58
59         list_for_each_entry(top_dev, &topology_device_list, list)
60                 if (top_dev->proximity_domain == proximity_domain) {
61                         device = top_dev;
62                         break;
63                 }
64
65         return device;
66 }
67
68 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
69                                                 uint32_t proximity_domain)
70 {
71         struct kfd_topology_device *device = NULL;
72
73         down_read(&topology_lock);
74
75         device = kfd_topology_device_by_proximity_domain_no_lock(
76                                                         proximity_domain);
77         up_read(&topology_lock);
78
79         return device;
80 }
81
82 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id)
83 {
84         struct kfd_topology_device *top_dev = NULL;
85         struct kfd_topology_device *ret = NULL;
86
87         down_read(&topology_lock);
88
89         list_for_each_entry(top_dev, &topology_device_list, list)
90                 if (top_dev->gpu_id == gpu_id) {
91                         ret = top_dev;
92                         break;
93                 }
94
95         up_read(&topology_lock);
96
97         return ret;
98 }
99
100 struct kfd_node *kfd_device_by_id(uint32_t gpu_id)
101 {
102         struct kfd_topology_device *top_dev;
103
104         top_dev = kfd_topology_device_by_id(gpu_id);
105         if (!top_dev)
106                 return NULL;
107
108         return top_dev->gpu;
109 }
110
111 struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev)
112 {
113         struct kfd_topology_device *top_dev;
114         struct kfd_node *device = NULL;
115
116         down_read(&topology_lock);
117
118         list_for_each_entry(top_dev, &topology_device_list, list)
119                 if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) {
120                         device = top_dev->gpu;
121                         break;
122                 }
123
124         up_read(&topology_lock);
125
126         return device;
127 }
128
129 /* Called with write topology_lock acquired */
130 static void kfd_release_topology_device(struct kfd_topology_device *dev)
131 {
132         struct kfd_mem_properties *mem;
133         struct kfd_cache_properties *cache;
134         struct kfd_iolink_properties *iolink;
135         struct kfd_iolink_properties *p2plink;
136         struct kfd_perf_properties *perf;
137
138         list_del(&dev->list);
139
140         while (dev->mem_props.next != &dev->mem_props) {
141                 mem = container_of(dev->mem_props.next,
142                                 struct kfd_mem_properties, list);
143                 list_del(&mem->list);
144                 kfree(mem);
145         }
146
147         while (dev->cache_props.next != &dev->cache_props) {
148                 cache = container_of(dev->cache_props.next,
149                                 struct kfd_cache_properties, list);
150                 list_del(&cache->list);
151                 kfree(cache);
152         }
153
154         while (dev->io_link_props.next != &dev->io_link_props) {
155                 iolink = container_of(dev->io_link_props.next,
156                                 struct kfd_iolink_properties, list);
157                 list_del(&iolink->list);
158                 kfree(iolink);
159         }
160
161         while (dev->p2p_link_props.next != &dev->p2p_link_props) {
162                 p2plink = container_of(dev->p2p_link_props.next,
163                                 struct kfd_iolink_properties, list);
164                 list_del(&p2plink->list);
165                 kfree(p2plink);
166         }
167
168         while (dev->perf_props.next != &dev->perf_props) {
169                 perf = container_of(dev->perf_props.next,
170                                 struct kfd_perf_properties, list);
171                 list_del(&perf->list);
172                 kfree(perf);
173         }
174
175         kfree(dev);
176 }
177
178 void kfd_release_topology_device_list(struct list_head *device_list)
179 {
180         struct kfd_topology_device *dev;
181
182         while (!list_empty(device_list)) {
183                 dev = list_first_entry(device_list,
184                                        struct kfd_topology_device, list);
185                 kfd_release_topology_device(dev);
186         }
187 }
188
189 static void kfd_release_live_view(void)
190 {
191         kfd_release_topology_device_list(&topology_device_list);
192         memset(&sys_props, 0, sizeof(sys_props));
193 }
194
195 struct kfd_topology_device *kfd_create_topology_device(
196                                 struct list_head *device_list)
197 {
198         struct kfd_topology_device *dev;
199
200         dev = kfd_alloc_struct(dev);
201         if (!dev) {
202                 pr_err("No memory to allocate a topology device");
203                 return NULL;
204         }
205
206         INIT_LIST_HEAD(&dev->mem_props);
207         INIT_LIST_HEAD(&dev->cache_props);
208         INIT_LIST_HEAD(&dev->io_link_props);
209         INIT_LIST_HEAD(&dev->p2p_link_props);
210         INIT_LIST_HEAD(&dev->perf_props);
211
212         list_add_tail(&dev->list, device_list);
213
214         return dev;
215 }
216
217
218 #define sysfs_show_gen_prop(buffer, offs, fmt, ...)             \
219                 (offs += snprintf(buffer+offs, PAGE_SIZE-offs,  \
220                                   fmt, __VA_ARGS__))
221 #define sysfs_show_32bit_prop(buffer, offs, name, value) \
222                 sysfs_show_gen_prop(buffer, offs, "%s %u\n", name, value)
223 #define sysfs_show_64bit_prop(buffer, offs, name, value) \
224                 sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value)
225 #define sysfs_show_32bit_val(buffer, offs, value) \
226                 sysfs_show_gen_prop(buffer, offs, "%u\n", value)
227 #define sysfs_show_str_val(buffer, offs, value) \
228                 sysfs_show_gen_prop(buffer, offs, "%s\n", value)
229
230 static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
231                 char *buffer)
232 {
233         int offs = 0;
234
235         /* Making sure that the buffer is an empty string */
236         buffer[0] = 0;
237
238         if (attr == &sys_props.attr_genid) {
239                 sysfs_show_32bit_val(buffer, offs,
240                                      sys_props.generation_count);
241         } else if (attr == &sys_props.attr_props) {
242                 sysfs_show_64bit_prop(buffer, offs, "platform_oem",
243                                       sys_props.platform_oem);
244                 sysfs_show_64bit_prop(buffer, offs, "platform_id",
245                                       sys_props.platform_id);
246                 sysfs_show_64bit_prop(buffer, offs, "platform_rev",
247                                       sys_props.platform_rev);
248         } else {
249                 offs = -EINVAL;
250         }
251
252         return offs;
253 }
254
255 static void kfd_topology_kobj_release(struct kobject *kobj)
256 {
257         kfree(kobj);
258 }
259
260 static const struct sysfs_ops sysprops_ops = {
261         .show = sysprops_show,
262 };
263
264 static const struct kobj_type sysprops_type = {
265         .release = kfd_topology_kobj_release,
266         .sysfs_ops = &sysprops_ops,
267 };
268
269 static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
270                 char *buffer)
271 {
272         int offs = 0;
273         struct kfd_iolink_properties *iolink;
274
275         /* Making sure that the buffer is an empty string */
276         buffer[0] = 0;
277
278         iolink = container_of(attr, struct kfd_iolink_properties, attr);
279         if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu))
280                 return -EPERM;
281         sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type);
282         sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj);
283         sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min);
284         sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from);
285         sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to);
286         sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight);
287         sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency);
288         sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency);
289         sysfs_show_32bit_prop(buffer, offs, "min_bandwidth",
290                               iolink->min_bandwidth);
291         sysfs_show_32bit_prop(buffer, offs, "max_bandwidth",
292                               iolink->max_bandwidth);
293         sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size",
294                               iolink->rec_transfer_size);
295         sysfs_show_32bit_prop(buffer, offs, "recommended_sdma_engine_id_mask",
296                               iolink->rec_sdma_eng_id_mask);
297         sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags);
298
299         return offs;
300 }
301
302 static const struct sysfs_ops iolink_ops = {
303         .show = iolink_show,
304 };
305
306 static const struct kobj_type iolink_type = {
307         .release = kfd_topology_kobj_release,
308         .sysfs_ops = &iolink_ops,
309 };
310
311 static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
312                 char *buffer)
313 {
314         int offs = 0;
315         struct kfd_mem_properties *mem;
316
317         /* Making sure that the buffer is an empty string */
318         buffer[0] = 0;
319
320         mem = container_of(attr, struct kfd_mem_properties, attr);
321         if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu))
322                 return -EPERM;
323         sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type);
324         sysfs_show_64bit_prop(buffer, offs, "size_in_bytes",
325                               mem->size_in_bytes);
326         sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags);
327         sysfs_show_32bit_prop(buffer, offs, "width", mem->width);
328         sysfs_show_32bit_prop(buffer, offs, "mem_clk_max",
329                               mem->mem_clk_max);
330
331         return offs;
332 }
333
334 static const struct sysfs_ops mem_ops = {
335         .show = mem_show,
336 };
337
338 static const struct kobj_type mem_type = {
339         .release = kfd_topology_kobj_release,
340         .sysfs_ops = &mem_ops,
341 };
342
343 static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
344                 char *buffer)
345 {
346         int offs = 0;
347         uint32_t i, j;
348         struct kfd_cache_properties *cache;
349
350         /* Making sure that the buffer is an empty string */
351         buffer[0] = 0;
352         cache = container_of(attr, struct kfd_cache_properties, attr);
353         if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
354                 return -EPERM;
355         sysfs_show_32bit_prop(buffer, offs, "processor_id_low",
356                         cache->processor_id_low);
357         sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level);
358         sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size);
359         sysfs_show_32bit_prop(buffer, offs, "cache_line_size",
360                               cache->cacheline_size);
361         sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag",
362                               cache->cachelines_per_tag);
363         sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
364         sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
365         sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
366
367         offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
368         for (i = 0; i < cache->sibling_map_size; i++)
369                 for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
370                         /* Check each bit */
371                         offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
372                                                 (cache->sibling_map[i] >> j) & 1);
373
374         /* Replace the last "," with end of line */
375         buffer[offs-1] = '\n';
376         return offs;
377 }
378
379 static const struct sysfs_ops cache_ops = {
380         .show = kfd_cache_show,
381 };
382
383 static const struct kobj_type cache_type = {
384         .release = kfd_topology_kobj_release,
385         .sysfs_ops = &cache_ops,
386 };
387
388 /****** Sysfs of Performance Counters ******/
389
390 struct kfd_perf_attr {
391         struct kobj_attribute attr;
392         uint32_t data;
393 };
394
395 static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
396                         char *buf)
397 {
398         int offs = 0;
399         struct kfd_perf_attr *attr;
400
401         buf[0] = 0;
402         attr = container_of(attrs, struct kfd_perf_attr, attr);
403         if (!attr->data) /* invalid data for PMC */
404                 return 0;
405         else
406                 return sysfs_show_32bit_val(buf, offs, attr->data);
407 }
408
409 #define KFD_PERF_DESC(_name, _data)                     \
410 {                                                       \
411         .attr  = __ATTR(_name, 0444, perf_show, NULL),  \
412         .data = _data,                                  \
413 }
414
415 static struct kfd_perf_attr perf_attr_iommu[] = {
416         KFD_PERF_DESC(max_concurrent, 0),
417         KFD_PERF_DESC(num_counters, 0),
418         KFD_PERF_DESC(counter_ids, 0),
419 };
420 /****************************************/
421
422 static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
423                 char *buffer)
424 {
425         int offs = 0;
426         struct kfd_topology_device *dev;
427         uint32_t log_max_watch_addr;
428
429         /* Making sure that the buffer is an empty string */
430         buffer[0] = 0;
431
432         if (strcmp(attr->name, "gpu_id") == 0) {
433                 dev = container_of(attr, struct kfd_topology_device,
434                                 attr_gpuid);
435                 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
436                         return -EPERM;
437                 return sysfs_show_32bit_val(buffer, offs, dev->gpu_id);
438         }
439
440         if (strcmp(attr->name, "name") == 0) {
441                 dev = container_of(attr, struct kfd_topology_device,
442                                 attr_name);
443
444                 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
445                         return -EPERM;
446                 return sysfs_show_str_val(buffer, offs, dev->node_props.name);
447         }
448
449         dev = container_of(attr, struct kfd_topology_device,
450                         attr_props);
451         if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
452                 return -EPERM;
453         sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
454                               dev->node_props.cpu_cores_count);
455         sysfs_show_32bit_prop(buffer, offs, "simd_count",
456                               dev->gpu ? dev->node_props.simd_count : 0);
457         sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
458                               dev->node_props.mem_banks_count);
459         sysfs_show_32bit_prop(buffer, offs, "caches_count",
460                               dev->node_props.caches_count);
461         sysfs_show_32bit_prop(buffer, offs, "io_links_count",
462                               dev->node_props.io_links_count);
463         sysfs_show_32bit_prop(buffer, offs, "p2p_links_count",
464                               dev->node_props.p2p_links_count);
465         sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
466                               dev->node_props.cpu_core_id_base);
467         sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
468                               dev->node_props.simd_id_base);
469         sysfs_show_32bit_prop(buffer, offs, "max_waves_per_simd",
470                               dev->node_props.max_waves_per_simd);
471         sysfs_show_32bit_prop(buffer, offs, "lds_size_in_kb",
472                               dev->node_props.lds_size_in_kb);
473         sysfs_show_32bit_prop(buffer, offs, "gds_size_in_kb",
474                               dev->node_props.gds_size_in_kb);
475         sysfs_show_32bit_prop(buffer, offs, "num_gws",
476                               dev->node_props.num_gws);
477         sysfs_show_32bit_prop(buffer, offs, "wave_front_size",
478                               dev->node_props.wave_front_size);
479         sysfs_show_32bit_prop(buffer, offs, "array_count",
480                               dev->gpu ? (dev->node_props.array_count *
481                                           NUM_XCC(dev->gpu->xcc_mask)) : 0);
482         sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine",
483                               dev->node_props.simd_arrays_per_engine);
484         sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array",
485                               dev->node_props.cu_per_simd_array);
486         sysfs_show_32bit_prop(buffer, offs, "simd_per_cu",
487                               dev->node_props.simd_per_cu);
488         sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu",
489                               dev->node_props.max_slots_scratch_cu);
490         sysfs_show_32bit_prop(buffer, offs, "gfx_target_version",
491                               dev->node_props.gfx_target_version);
492         sysfs_show_32bit_prop(buffer, offs, "vendor_id",
493                               dev->node_props.vendor_id);
494         sysfs_show_32bit_prop(buffer, offs, "device_id",
495                               dev->node_props.device_id);
496         sysfs_show_32bit_prop(buffer, offs, "location_id",
497                               dev->node_props.location_id);
498         sysfs_show_32bit_prop(buffer, offs, "domain",
499                               dev->node_props.domain);
500         sysfs_show_32bit_prop(buffer, offs, "drm_render_minor",
501                               dev->node_props.drm_render_minor);
502         sysfs_show_64bit_prop(buffer, offs, "hive_id",
503                               dev->node_props.hive_id);
504         sysfs_show_32bit_prop(buffer, offs, "num_sdma_engines",
505                               dev->node_props.num_sdma_engines);
506         sysfs_show_32bit_prop(buffer, offs, "num_sdma_xgmi_engines",
507                               dev->node_props.num_sdma_xgmi_engines);
508         sysfs_show_32bit_prop(buffer, offs, "num_sdma_queues_per_engine",
509                               dev->node_props.num_sdma_queues_per_engine);
510         sysfs_show_32bit_prop(buffer, offs, "num_cp_queues",
511                               dev->node_props.num_cp_queues);
512
513         if (dev->gpu) {
514                 log_max_watch_addr =
515                         __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points);
516
517                 if (log_max_watch_addr) {
518                         dev->node_props.capability |=
519                                         HSA_CAP_WATCH_POINTS_SUPPORTED;
520
521                         dev->node_props.capability |=
522                                 ((log_max_watch_addr <<
523                                         HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) &
524                                 HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
525                 }
526
527                 if (dev->gpu->adev->asic_type == CHIP_TONGA)
528                         dev->node_props.capability |=
529                                         HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
530
531                 sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
532                         dev->node_props.max_engine_clk_fcompute);
533
534                 sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL);
535
536                 sysfs_show_32bit_prop(buffer, offs, "fw_version",
537                                       dev->gpu->kfd->mec_fw_version);
538                 sysfs_show_32bit_prop(buffer, offs, "capability",
539                                       dev->node_props.capability);
540                 sysfs_show_64bit_prop(buffer, offs, "debug_prop",
541                                       dev->node_props.debug_prop);
542                 sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
543                                       dev->gpu->kfd->sdma_fw_version);
544                 sysfs_show_64bit_prop(buffer, offs, "unique_id",
545                                       dev->gpu->adev->unique_id);
546                 sysfs_show_32bit_prop(buffer, offs, "num_xcc",
547                                       NUM_XCC(dev->gpu->xcc_mask));
548         }
549
550         return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute",
551                                      cpufreq_quick_get_max(0)/1000);
552 }
553
554 static const struct sysfs_ops node_ops = {
555         .show = node_show,
556 };
557
558 static const struct kobj_type node_type = {
559         .release = kfd_topology_kobj_release,
560         .sysfs_ops = &node_ops,
561 };
562
563 static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
564 {
565         sysfs_remove_file(kobj, attr);
566         kobject_del(kobj);
567         kobject_put(kobj);
568 }
569
570 static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
571 {
572         struct kfd_iolink_properties *p2plink;
573         struct kfd_iolink_properties *iolink;
574         struct kfd_cache_properties *cache;
575         struct kfd_mem_properties *mem;
576         struct kfd_perf_properties *perf;
577
578         if (dev->kobj_iolink) {
579                 list_for_each_entry(iolink, &dev->io_link_props, list)
580                         if (iolink->kobj) {
581                                 kfd_remove_sysfs_file(iolink->kobj,
582                                                         &iolink->attr);
583                                 iolink->kobj = NULL;
584                         }
585                 kobject_del(dev->kobj_iolink);
586                 kobject_put(dev->kobj_iolink);
587                 dev->kobj_iolink = NULL;
588         }
589
590         if (dev->kobj_p2plink) {
591                 list_for_each_entry(p2plink, &dev->p2p_link_props, list)
592                         if (p2plink->kobj) {
593                                 kfd_remove_sysfs_file(p2plink->kobj,
594                                                         &p2plink->attr);
595                                 p2plink->kobj = NULL;
596                         }
597                 kobject_del(dev->kobj_p2plink);
598                 kobject_put(dev->kobj_p2plink);
599                 dev->kobj_p2plink = NULL;
600         }
601
602         if (dev->kobj_cache) {
603                 list_for_each_entry(cache, &dev->cache_props, list)
604                         if (cache->kobj) {
605                                 kfd_remove_sysfs_file(cache->kobj,
606                                                         &cache->attr);
607                                 cache->kobj = NULL;
608                         }
609                 kobject_del(dev->kobj_cache);
610                 kobject_put(dev->kobj_cache);
611                 dev->kobj_cache = NULL;
612         }
613
614         if (dev->kobj_mem) {
615                 list_for_each_entry(mem, &dev->mem_props, list)
616                         if (mem->kobj) {
617                                 kfd_remove_sysfs_file(mem->kobj, &mem->attr);
618                                 mem->kobj = NULL;
619                         }
620                 kobject_del(dev->kobj_mem);
621                 kobject_put(dev->kobj_mem);
622                 dev->kobj_mem = NULL;
623         }
624
625         if (dev->kobj_perf) {
626                 list_for_each_entry(perf, &dev->perf_props, list) {
627                         kfree(perf->attr_group);
628                         perf->attr_group = NULL;
629                 }
630                 kobject_del(dev->kobj_perf);
631                 kobject_put(dev->kobj_perf);
632                 dev->kobj_perf = NULL;
633         }
634
635         if (dev->kobj_node) {
636                 sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
637                 sysfs_remove_file(dev->kobj_node, &dev->attr_name);
638                 sysfs_remove_file(dev->kobj_node, &dev->attr_props);
639                 kobject_del(dev->kobj_node);
640                 kobject_put(dev->kobj_node);
641                 dev->kobj_node = NULL;
642         }
643 }
644
645 static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
646                 uint32_t id)
647 {
648         struct kfd_iolink_properties *p2plink;
649         struct kfd_iolink_properties *iolink;
650         struct kfd_cache_properties *cache;
651         struct kfd_mem_properties *mem;
652         struct kfd_perf_properties *perf;
653         int ret;
654         uint32_t i, num_attrs;
655         struct attribute **attrs;
656
657         if (WARN_ON(dev->kobj_node))
658                 return -EEXIST;
659
660         /*
661          * Creating the sysfs folders
662          */
663         dev->kobj_node = kfd_alloc_struct(dev->kobj_node);
664         if (!dev->kobj_node)
665                 return -ENOMEM;
666
667         ret = kobject_init_and_add(dev->kobj_node, &node_type,
668                         sys_props.kobj_nodes, "%d", id);
669         if (ret < 0) {
670                 kobject_put(dev->kobj_node);
671                 return ret;
672         }
673
674         dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node);
675         if (!dev->kobj_mem)
676                 return -ENOMEM;
677
678         dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node);
679         if (!dev->kobj_cache)
680                 return -ENOMEM;
681
682         dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
683         if (!dev->kobj_iolink)
684                 return -ENOMEM;
685
686         dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node);
687         if (!dev->kobj_p2plink)
688                 return -ENOMEM;
689
690         dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
691         if (!dev->kobj_perf)
692                 return -ENOMEM;
693
694         /*
695          * Creating sysfs files for node properties
696          */
697         dev->attr_gpuid.name = "gpu_id";
698         dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE;
699         sysfs_attr_init(&dev->attr_gpuid);
700         dev->attr_name.name = "name";
701         dev->attr_name.mode = KFD_SYSFS_FILE_MODE;
702         sysfs_attr_init(&dev->attr_name);
703         dev->attr_props.name = "properties";
704         dev->attr_props.mode = KFD_SYSFS_FILE_MODE;
705         sysfs_attr_init(&dev->attr_props);
706         ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid);
707         if (ret < 0)
708                 return ret;
709         ret = sysfs_create_file(dev->kobj_node, &dev->attr_name);
710         if (ret < 0)
711                 return ret;
712         ret = sysfs_create_file(dev->kobj_node, &dev->attr_props);
713         if (ret < 0)
714                 return ret;
715
716         i = 0;
717         list_for_each_entry(mem, &dev->mem_props, list) {
718                 mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
719                 if (!mem->kobj)
720                         return -ENOMEM;
721                 ret = kobject_init_and_add(mem->kobj, &mem_type,
722                                 dev->kobj_mem, "%d", i);
723                 if (ret < 0) {
724                         kobject_put(mem->kobj);
725                         return ret;
726                 }
727
728                 mem->attr.name = "properties";
729                 mem->attr.mode = KFD_SYSFS_FILE_MODE;
730                 sysfs_attr_init(&mem->attr);
731                 ret = sysfs_create_file(mem->kobj, &mem->attr);
732                 if (ret < 0)
733                         return ret;
734                 i++;
735         }
736
737         i = 0;
738         list_for_each_entry(cache, &dev->cache_props, list) {
739                 cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
740                 if (!cache->kobj)
741                         return -ENOMEM;
742                 ret = kobject_init_and_add(cache->kobj, &cache_type,
743                                 dev->kobj_cache, "%d", i);
744                 if (ret < 0) {
745                         kobject_put(cache->kobj);
746                         return ret;
747                 }
748
749                 cache->attr.name = "properties";
750                 cache->attr.mode = KFD_SYSFS_FILE_MODE;
751                 sysfs_attr_init(&cache->attr);
752                 ret = sysfs_create_file(cache->kobj, &cache->attr);
753                 if (ret < 0)
754                         return ret;
755                 i++;
756         }
757
758         i = 0;
759         list_for_each_entry(iolink, &dev->io_link_props, list) {
760                 iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
761                 if (!iolink->kobj)
762                         return -ENOMEM;
763                 ret = kobject_init_and_add(iolink->kobj, &iolink_type,
764                                 dev->kobj_iolink, "%d", i);
765                 if (ret < 0) {
766                         kobject_put(iolink->kobj);
767                         return ret;
768                 }
769
770                 iolink->attr.name = "properties";
771                 iolink->attr.mode = KFD_SYSFS_FILE_MODE;
772                 sysfs_attr_init(&iolink->attr);
773                 ret = sysfs_create_file(iolink->kobj, &iolink->attr);
774                 if (ret < 0)
775                         return ret;
776                 i++;
777         }
778
779         i = 0;
780         list_for_each_entry(p2plink, &dev->p2p_link_props, list) {
781                 p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
782                 if (!p2plink->kobj)
783                         return -ENOMEM;
784                 ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
785                                 dev->kobj_p2plink, "%d", i);
786                 if (ret < 0) {
787                         kobject_put(p2plink->kobj);
788                         return ret;
789                 }
790
791                 p2plink->attr.name = "properties";
792                 p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
793                 sysfs_attr_init(&p2plink->attr);
794                 ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
795                 if (ret < 0)
796                         return ret;
797                 i++;
798         }
799
800         /* All hardware blocks have the same number of attributes. */
801         num_attrs = ARRAY_SIZE(perf_attr_iommu);
802         list_for_each_entry(perf, &dev->perf_props, list) {
803                 perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
804                         * num_attrs + sizeof(struct attribute_group),
805                         GFP_KERNEL);
806                 if (!perf->attr_group)
807                         return -ENOMEM;
808
809                 attrs = (struct attribute **)(perf->attr_group + 1);
810                 if (!strcmp(perf->block_name, "iommu")) {
811                 /* Information of IOMMU's num_counters and counter_ids is shown
812                  * under /sys/bus/event_source/devices/amd_iommu. We don't
813                  * duplicate here.
814                  */
815                         perf_attr_iommu[0].data = perf->max_concurrent;
816                         for (i = 0; i < num_attrs; i++)
817                                 attrs[i] = &perf_attr_iommu[i].attr.attr;
818                 }
819                 perf->attr_group->name = perf->block_name;
820                 perf->attr_group->attrs = attrs;
821                 ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
822                 if (ret < 0)
823                         return ret;
824         }
825
826         return 0;
827 }
828
829 /* Called with write topology lock acquired */
830 static int kfd_build_sysfs_node_tree(void)
831 {
832         struct kfd_topology_device *dev;
833         int ret;
834         uint32_t i = 0;
835
836         list_for_each_entry(dev, &topology_device_list, list) {
837                 ret = kfd_build_sysfs_node_entry(dev, i);
838                 if (ret < 0)
839                         return ret;
840                 i++;
841         }
842
843         return 0;
844 }
845
846 /* Called with write topology lock acquired */
847 static void kfd_remove_sysfs_node_tree(void)
848 {
849         struct kfd_topology_device *dev;
850
851         list_for_each_entry(dev, &topology_device_list, list)
852                 kfd_remove_sysfs_node_entry(dev);
853 }
854
855 static int kfd_topology_update_sysfs(void)
856 {
857         int ret;
858
859         if (!sys_props.kobj_topology) {
860                 sys_props.kobj_topology =
861                                 kfd_alloc_struct(sys_props.kobj_topology);
862                 if (!sys_props.kobj_topology)
863                         return -ENOMEM;
864
865                 ret = kobject_init_and_add(sys_props.kobj_topology,
866                                 &sysprops_type,  &kfd_device->kobj,
867                                 "topology");
868                 if (ret < 0) {
869                         kobject_put(sys_props.kobj_topology);
870                         return ret;
871                 }
872
873                 sys_props.kobj_nodes = kobject_create_and_add("nodes",
874                                 sys_props.kobj_topology);
875                 if (!sys_props.kobj_nodes)
876                         return -ENOMEM;
877
878                 sys_props.attr_genid.name = "generation_id";
879                 sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE;
880                 sysfs_attr_init(&sys_props.attr_genid);
881                 ret = sysfs_create_file(sys_props.kobj_topology,
882                                 &sys_props.attr_genid);
883                 if (ret < 0)
884                         return ret;
885
886                 sys_props.attr_props.name = "system_properties";
887                 sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE;
888                 sysfs_attr_init(&sys_props.attr_props);
889                 ret = sysfs_create_file(sys_props.kobj_topology,
890                                 &sys_props.attr_props);
891                 if (ret < 0)
892                         return ret;
893         }
894
895         kfd_remove_sysfs_node_tree();
896
897         return kfd_build_sysfs_node_tree();
898 }
899
900 static void kfd_topology_release_sysfs(void)
901 {
902         kfd_remove_sysfs_node_tree();
903         if (sys_props.kobj_topology) {
904                 sysfs_remove_file(sys_props.kobj_topology,
905                                 &sys_props.attr_genid);
906                 sysfs_remove_file(sys_props.kobj_topology,
907                                 &sys_props.attr_props);
908                 if (sys_props.kobj_nodes) {
909                         kobject_del(sys_props.kobj_nodes);
910                         kobject_put(sys_props.kobj_nodes);
911                         sys_props.kobj_nodes = NULL;
912                 }
913                 kobject_del(sys_props.kobj_topology);
914                 kobject_put(sys_props.kobj_topology);
915                 sys_props.kobj_topology = NULL;
916         }
917 }
918
919 /* Called with write topology_lock acquired */
920 static void kfd_topology_update_device_list(struct list_head *temp_list,
921                                         struct list_head *master_list)
922 {
923         while (!list_empty(temp_list)) {
924                 list_move_tail(temp_list->next, master_list);
925                 sys_props.num_devices++;
926         }
927 }
928
929 static void kfd_debug_print_topology(void)
930 {
931         struct kfd_topology_device *dev;
932
933         down_read(&topology_lock);
934
935         dev = list_last_entry(&topology_device_list,
936                         struct kfd_topology_device, list);
937         if (dev) {
938                 if (dev->node_props.cpu_cores_count &&
939                                 dev->node_props.simd_count) {
940                         pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
941                                 dev->node_props.device_id,
942                                 dev->node_props.vendor_id);
943                 } else if (dev->node_props.cpu_cores_count)
944                         pr_info("Topology: Add CPU node\n");
945                 else if (dev->node_props.simd_count)
946                         pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
947                                 dev->node_props.device_id,
948                                 dev->node_props.vendor_id);
949         }
950         up_read(&topology_lock);
951 }
952
953 /* Helper function for intializing platform_xx members of
954  * kfd_system_properties. Uses OEM info from the last CPU/APU node.
955  */
956 static void kfd_update_system_properties(void)
957 {
958         struct kfd_topology_device *dev;
959
960         down_read(&topology_lock);
961         dev = list_last_entry(&topology_device_list,
962                         struct kfd_topology_device, list);
963         if (dev) {
964                 sys_props.platform_id = dev->oem_id64;
965                 sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
966                 sys_props.platform_rev = dev->oem_revision;
967         }
968         up_read(&topology_lock);
969 }
970
971 static void find_system_memory(const struct dmi_header *dm,
972         void *private)
973 {
974         struct kfd_mem_properties *mem;
975         u16 mem_width, mem_clock;
976         struct kfd_topology_device *kdev =
977                 (struct kfd_topology_device *)private;
978         const u8 *dmi_data = (const u8 *)(dm + 1);
979
980         if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
981                 mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
982                 mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
983                 list_for_each_entry(mem, &kdev->mem_props, list) {
984                         if (mem_width != 0xFFFF && mem_width != 0)
985                                 mem->width = mem_width;
986                         if (mem_clock != 0)
987                                 mem->mem_clk_max = mem_clock;
988                 }
989         }
990 }
991
992 /* kfd_add_non_crat_information - Add information that is not currently
993  *      defined in CRAT but is necessary for KFD topology
994  * @dev - topology device to which addition info is added
995  */
996 static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
997 {
998         /* Check if CPU only node. */
999         if (!kdev->gpu) {
1000                 /* Add system memory information */
1001                 dmi_walk(find_system_memory, kdev);
1002         }
1003         /* TODO: For GPU node, rearrange code from kfd_topology_add_device */
1004 }
1005
1006 int kfd_topology_init(void)
1007 {
1008         void *crat_image = NULL;
1009         size_t image_size = 0;
1010         int ret;
1011         struct list_head temp_topology_device_list;
1012         int cpu_only_node = 0;
1013         struct kfd_topology_device *kdev;
1014         int proximity_domain;
1015
1016         /* topology_device_list - Master list of all topology devices
1017          * temp_topology_device_list - temporary list created while parsing CRAT
1018          * or VCRAT. Once parsing is complete the contents of list is moved to
1019          * topology_device_list
1020          */
1021
1022         /* Initialize the head for the both the lists */
1023         INIT_LIST_HEAD(&topology_device_list);
1024         INIT_LIST_HEAD(&temp_topology_device_list);
1025         init_rwsem(&topology_lock);
1026
1027         memset(&sys_props, 0, sizeof(sys_props));
1028
1029         /* Proximity domains in ACPI CRAT tables start counting at
1030          * 0. The same should be true for virtual CRAT tables created
1031          * at this stage. GPUs added later in kfd_topology_add_device
1032          * use a counter.
1033          */
1034         proximity_domain = 0;
1035
1036         ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
1037                                             COMPUTE_UNIT_CPU, NULL,
1038                                             proximity_domain);
1039         cpu_only_node = 1;
1040         if (ret) {
1041                 pr_err("Error creating VCRAT table for CPU\n");
1042                 return ret;
1043         }
1044
1045         ret = kfd_parse_crat_table(crat_image,
1046                                    &temp_topology_device_list,
1047                                    proximity_domain);
1048         if (ret) {
1049                 pr_err("Error parsing VCRAT table for CPU\n");
1050                 goto err;
1051         }
1052
1053         kdev = list_first_entry(&temp_topology_device_list,
1054                                 struct kfd_topology_device, list);
1055
1056         down_write(&topology_lock);
1057         kfd_topology_update_device_list(&temp_topology_device_list,
1058                                         &topology_device_list);
1059         topology_crat_proximity_domain = sys_props.num_devices-1;
1060         ret = kfd_topology_update_sysfs();
1061         up_write(&topology_lock);
1062
1063         if (!ret) {
1064                 sys_props.generation_count++;
1065                 kfd_update_system_properties();
1066                 kfd_debug_print_topology();
1067         } else
1068                 pr_err("Failed to update topology in sysfs ret=%d\n", ret);
1069
1070         /* For nodes with GPU, this information gets added
1071          * when GPU is detected (kfd_topology_add_device).
1072          */
1073         if (cpu_only_node) {
1074                 /* Add additional information to CPU only node created above */
1075                 down_write(&topology_lock);
1076                 kdev = list_first_entry(&topology_device_list,
1077                                 struct kfd_topology_device, list);
1078                 up_write(&topology_lock);
1079                 kfd_add_non_crat_information(kdev);
1080         }
1081
1082 err:
1083         kfd_destroy_crat_image(crat_image);
1084         return ret;
1085 }
1086
1087 void kfd_topology_shutdown(void)
1088 {
1089         down_write(&topology_lock);
1090         kfd_topology_release_sysfs();
1091         kfd_release_live_view();
1092         up_write(&topology_lock);
1093 }
1094
1095 static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
1096 {
1097         uint32_t gpu_id;
1098         uint32_t buf[8];
1099         uint64_t local_mem_size;
1100         struct kfd_topology_device *dev;
1101         bool is_unique;
1102         uint8_t *crc_buf;
1103
1104         if (!gpu)
1105                 return 0;
1106
1107         crc_buf = (uint8_t *)&buf;
1108         local_mem_size = gpu->local_mem_info.local_mem_size_private +
1109                         gpu->local_mem_info.local_mem_size_public;
1110         buf[0] = gpu->adev->pdev->devfn;
1111         buf[1] = gpu->adev->pdev->subsystem_vendor |
1112                 (gpu->adev->pdev->subsystem_device << 16);
1113         buf[2] = pci_domain_nr(gpu->adev->pdev->bus);
1114         buf[3] = gpu->adev->pdev->device;
1115         buf[4] = gpu->adev->pdev->bus->number;
1116         buf[5] = lower_32_bits(local_mem_size);
1117         buf[6] = upper_32_bits(local_mem_size);
1118         buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16);
1119
1120         gpu_id = crc16(0, crc_buf, sizeof(buf)) &
1121                  ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
1122
1123         /* There is a very small possibility when generating a
1124          * 16 (KFD_GPU_ID_HASH_WIDTH) bit value from 8 word buffer
1125          * that the value could be 0 or non-unique. So, check if
1126          * it is unique and non-zero. If not unique increment till
1127          * unique one is found. In case of overflow, restart from 1
1128          */
1129
1130         down_read(&topology_lock);
1131         do {
1132                 is_unique = true;
1133                 if (!gpu_id)
1134                         gpu_id = 1;
1135                 list_for_each_entry(dev, &topology_device_list, list) {
1136                         if (dev->gpu && dev->gpu_id == gpu_id) {
1137                                 is_unique = false;
1138                                 break;
1139                         }
1140                 }
1141                 if (unlikely(!is_unique))
1142                         gpu_id = (gpu_id + 1) &
1143                                   ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
1144         } while (!is_unique);
1145         up_read(&topology_lock);
1146
1147         return gpu_id;
1148 }
1149 /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
1150  *              the GPU device is not already present in the topology device
1151  *              list then return NULL. This means a new topology device has to
1152  *              be created for this GPU.
1153  */
1154 static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu)
1155 {
1156         struct kfd_topology_device *dev;
1157         struct kfd_topology_device *out_dev = NULL;
1158         struct kfd_mem_properties *mem;
1159         struct kfd_cache_properties *cache;
1160         struct kfd_iolink_properties *iolink;
1161         struct kfd_iolink_properties *p2plink;
1162
1163         list_for_each_entry(dev, &topology_device_list, list) {
1164                 /* Discrete GPUs need their own topology device list
1165                  * entries. Don't assign them to CPU/APU nodes.
1166                  */
1167                 if (dev->node_props.cpu_cores_count)
1168                         continue;
1169
1170                 if (!dev->gpu && (dev->node_props.simd_count > 0)) {
1171                         dev->gpu = gpu;
1172                         out_dev = dev;
1173
1174                         list_for_each_entry(mem, &dev->mem_props, list)
1175                                 mem->gpu = dev->gpu;
1176                         list_for_each_entry(cache, &dev->cache_props, list)
1177                                 cache->gpu = dev->gpu;
1178                         list_for_each_entry(iolink, &dev->io_link_props, list)
1179                                 iolink->gpu = dev->gpu;
1180                         list_for_each_entry(p2plink, &dev->p2p_link_props, list)
1181                                 p2plink->gpu = dev->gpu;
1182                         break;
1183                 }
1184         }
1185         return out_dev;
1186 }
1187
1188 static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
1189 {
1190         /*
1191          * TODO: Generate an event for thunk about the arrival/removal
1192          * of the GPU
1193          */
1194 }
1195
1196 /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
1197  *              patch this after CRAT parsing.
1198  */
1199 static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
1200 {
1201         struct kfd_mem_properties *mem;
1202         struct kfd_local_mem_info local_mem_info;
1203
1204         if (!dev)
1205                 return;
1206
1207         /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with
1208          * single bank of VRAM local memory.
1209          * for dGPUs - VCRAT reports only one bank of Local Memory
1210          * for APUs - If CRAT from ACPI reports more than one bank, then
1211          *      all the banks will report the same mem_clk_max information
1212          */
1213         amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info,
1214                                          dev->gpu->xcp);
1215
1216         list_for_each_entry(mem, &dev->mem_props, list)
1217                 mem->mem_clk_max = local_mem_info.mem_clk_max;
1218 }
1219
1220 static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
1221                                         struct kfd_topology_device *target_gpu_dev,
1222                                         struct kfd_iolink_properties *link)
1223 {
1224         /* xgmi always supports atomics between links. */
1225         if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI)
1226                 return;
1227
1228         /* check pcie support to set cpu(dev) flags for target_gpu_dev link. */
1229         if (target_gpu_dev) {
1230                 uint32_t cap;
1231
1232                 pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev,
1233                                 PCI_EXP_DEVCAP2, &cap);
1234
1235                 if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
1236                              PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
1237                         link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1238                                 CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1239         /* set gpu (dev) flags. */
1240         } else {
1241                 if (!dev->gpu->kfd->pci_atomic_requested ||
1242                                 dev->gpu->adev->asic_type == CHIP_HAWAII)
1243                         link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1244                                 CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1245         }
1246 }
1247
1248 static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev,
1249                 struct kfd_iolink_properties *outbound_link,
1250                 struct kfd_iolink_properties *inbound_link)
1251 {
1252         /* CPU -> GPU with PCIe */
1253         if (!to_dev->gpu &&
1254             inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
1255                 inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
1256
1257         if (to_dev->gpu) {
1258                 /* GPU <-> GPU with PCIe and
1259                  * Vega20 with XGMI
1260                  */
1261                 if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS ||
1262                     (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
1263                     KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) {
1264                         outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
1265                         inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
1266                 }
1267         }
1268 }
1269
1270 #define REC_SDMA_NUM_GPU        8
1271 static const int rec_sdma_eng_map[REC_SDMA_NUM_GPU][REC_SDMA_NUM_GPU] = {
1272                                                         { -1, 14, 12, 2, 4, 8, 10, 6 },
1273                                                         { 14, -1, 2, 10, 8, 4, 6, 12 },
1274                                                         { 10, 2, -1, 12, 14, 6, 4, 8 },
1275                                                         { 2, 12, 10, -1, 6, 14, 8, 4 },
1276                                                         { 4, 8, 14, 6, -1, 10, 12, 2 },
1277                                                         { 8, 4, 6, 14, 12, -1, 2, 10 },
1278                                                         { 10, 6, 4, 8, 12, 2, -1, 14 },
1279                                                         { 6, 12, 8, 4, 2, 10, 14, -1 }};
1280
1281 static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev,
1282                                              struct kfd_iolink_properties *outbound_link,
1283                                              struct kfd_iolink_properties *inbound_link)
1284 {
1285         struct kfd_node *gpu = outbound_link->gpu;
1286         struct amdgpu_device *adev = gpu->adev;
1287         int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
1288         bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu &&
1289                 adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 &&
1290                 kfd_get_num_xgmi_sdma_engines(gpu) >= 14 &&
1291                 (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8);
1292
1293         if (support_rec_eng) {
1294                 int src_socket_id = adev->gmc.xgmi.physical_node_id;
1295                 int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id;
1296
1297                 outbound_link->rec_sdma_eng_id_mask =
1298                         1 << rec_sdma_eng_map[src_socket_id][dst_socket_id];
1299                 inbound_link->rec_sdma_eng_id_mask =
1300                         1 << rec_sdma_eng_map[dst_socket_id][src_socket_id];
1301         } else {
1302                 int num_sdma_eng = kfd_get_num_sdma_engines(gpu);
1303                 int i, eng_offset = 0;
1304
1305                 if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
1306                     kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) {
1307                         eng_offset = num_sdma_eng;
1308                         num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu);
1309                 }
1310
1311                 for (i = 0; i < num_sdma_eng; i++) {
1312                         outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
1313                         inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
1314                 }
1315         }
1316 }
1317
1318 static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
1319 {
1320         struct kfd_iolink_properties *link, *inbound_link;
1321         struct kfd_topology_device *peer_dev;
1322
1323         if (!dev || !dev->gpu)
1324                 return;
1325
1326         /* GPU only creates direct links so apply flags setting to all */
1327         list_for_each_entry(link, &dev->io_link_props, list) {
1328                 link->flags = CRAT_IOLINK_FLAGS_ENABLED;
1329                 kfd_set_iolink_no_atomics(dev, NULL, link);
1330                 peer_dev = kfd_topology_device_by_proximity_domain(
1331                                 link->node_to);
1332
1333                 if (!peer_dev)
1334                         continue;
1335
1336                 /* Include the CPU peer in GPU hive if connected over xGMI. */
1337                 if (!peer_dev->gpu &&
1338                     link->iolink_type == CRAT_IOLINK_TYPE_XGMI) {
1339                         /*
1340                          * If the GPU is not part of a GPU hive, use its pci
1341                          * device location as the hive ID to bind with the CPU.
1342                          */
1343                         if (!dev->node_props.hive_id)
1344                                 dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev);
1345                         peer_dev->node_props.hive_id = dev->node_props.hive_id;
1346                 }
1347
1348                 list_for_each_entry(inbound_link, &peer_dev->io_link_props,
1349                                                                         list) {
1350                         if (inbound_link->node_to != link->node_from)
1351                                 continue;
1352
1353                         inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
1354                         kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
1355                         kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
1356                         kfd_set_recommended_sdma_engines(peer_dev, link, inbound_link);
1357                 }
1358         }
1359
1360         /* Create indirect links so apply flags setting to all */
1361         list_for_each_entry(link, &dev->p2p_link_props, list) {
1362                 link->flags = CRAT_IOLINK_FLAGS_ENABLED;
1363                 kfd_set_iolink_no_atomics(dev, NULL, link);
1364                 peer_dev = kfd_topology_device_by_proximity_domain(
1365                                 link->node_to);
1366
1367                 if (!peer_dev)
1368                         continue;
1369
1370                 list_for_each_entry(inbound_link, &peer_dev->p2p_link_props,
1371                                                                         list) {
1372                         if (inbound_link->node_to != link->node_from)
1373                                 continue;
1374
1375                         inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
1376                         kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
1377                         kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
1378                 }
1379         }
1380 }
1381
1382 static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
1383                                 struct kfd_iolink_properties *p2plink)
1384 {
1385         int ret;
1386
1387         p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
1388         if (!p2plink->kobj)
1389                 return -ENOMEM;
1390
1391         ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
1392                         dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1);
1393         if (ret < 0) {
1394                 kobject_put(p2plink->kobj);
1395                 return ret;
1396         }
1397
1398         p2plink->attr.name = "properties";
1399         p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
1400         sysfs_attr_init(&p2plink->attr);
1401         ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
1402         if (ret < 0)
1403                 return ret;
1404
1405         return 0;
1406 }
1407
1408 static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
1409 {
1410         struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link;
1411         struct kfd_iolink_properties *props = NULL, *props2 = NULL;
1412         struct kfd_topology_device *cpu_dev;
1413         int ret = 0;
1414         int i, num_cpu;
1415
1416         num_cpu = 0;
1417         list_for_each_entry(cpu_dev, &topology_device_list, list) {
1418                 if (cpu_dev->gpu)
1419                         break;
1420                 num_cpu++;
1421         }
1422
1423         if (list_empty(&kdev->io_link_props))
1424                 return -ENODATA;
1425
1426         gpu_link = list_first_entry(&kdev->io_link_props,
1427                                     struct kfd_iolink_properties, list);
1428
1429         for (i = 0; i < num_cpu; i++) {
1430                 /* CPU <--> GPU */
1431                 if (gpu_link->node_to == i)
1432                         continue;
1433
1434                 /* find CPU <-->  CPU links */
1435                 cpu_link = NULL;
1436                 cpu_dev = kfd_topology_device_by_proximity_domain(i);
1437                 if (cpu_dev) {
1438                         list_for_each_entry(tmp_link,
1439                                         &cpu_dev->io_link_props, list) {
1440                                 if (tmp_link->node_to == gpu_link->node_to) {
1441                                         cpu_link = tmp_link;
1442                                         break;
1443                                 }
1444                         }
1445                 }
1446
1447                 if (!cpu_link)
1448                         return -ENOMEM;
1449
1450                 /* CPU <--> CPU <--> GPU, GPU node*/
1451                 props = kfd_alloc_struct(props);
1452                 if (!props)
1453                         return -ENOMEM;
1454
1455                 memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties));
1456                 props->weight = gpu_link->weight + cpu_link->weight;
1457                 props->min_latency = gpu_link->min_latency + cpu_link->min_latency;
1458                 props->max_latency = gpu_link->max_latency + cpu_link->max_latency;
1459                 props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth);
1460                 props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth);
1461
1462                 props->node_from = gpu_node;
1463                 props->node_to = i;
1464                 kdev->node_props.p2p_links_count++;
1465                 list_add_tail(&props->list, &kdev->p2p_link_props);
1466                 ret = kfd_build_p2p_node_entry(kdev, props);
1467                 if (ret < 0)
1468                         return ret;
1469
1470                 /* for small Bar, no CPU --> GPU in-direct links */
1471                 if (kfd_dev_is_large_bar(kdev->gpu)) {
1472                         /* CPU <--> CPU <--> GPU, CPU node*/
1473                         props2 = kfd_alloc_struct(props2);
1474                         if (!props2)
1475                                 return -ENOMEM;
1476
1477                         memcpy(props2, props, sizeof(struct kfd_iolink_properties));
1478                         props2->node_from = i;
1479                         props2->node_to = gpu_node;
1480                         props2->kobj = NULL;
1481                         cpu_dev->node_props.p2p_links_count++;
1482                         list_add_tail(&props2->list, &cpu_dev->p2p_link_props);
1483                         ret = kfd_build_p2p_node_entry(cpu_dev, props2);
1484                         if (ret < 0)
1485                                 return ret;
1486                 }
1487         }
1488         return ret;
1489 }
1490
1491 #if defined(CONFIG_HSA_AMD_P2P)
1492 static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
1493                 struct kfd_topology_device *peer, int from, int to)
1494 {
1495         struct kfd_iolink_properties *props = NULL;
1496         struct kfd_iolink_properties *iolink1, *iolink2, *iolink3;
1497         struct kfd_topology_device *cpu_dev;
1498         int ret = 0;
1499
1500         if (!amdgpu_device_is_peer_accessible(
1501                                 kdev->gpu->adev,
1502                                 peer->gpu->adev))
1503                 return ret;
1504
1505         if (list_empty(&kdev->io_link_props))
1506                 return -ENODATA;
1507
1508         iolink1 = list_first_entry(&kdev->io_link_props,
1509                                    struct kfd_iolink_properties, list);
1510
1511         if (list_empty(&peer->io_link_props))
1512                 return -ENODATA;
1513
1514         iolink2 = list_first_entry(&peer->io_link_props,
1515                                    struct kfd_iolink_properties, list);
1516
1517         props = kfd_alloc_struct(props);
1518         if (!props)
1519                 return -ENOMEM;
1520
1521         memcpy(props, iolink1, sizeof(struct kfd_iolink_properties));
1522
1523         props->weight = iolink1->weight + iolink2->weight;
1524         props->min_latency = iolink1->min_latency + iolink2->min_latency;
1525         props->max_latency = iolink1->max_latency + iolink2->max_latency;
1526         props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth);
1527         props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth);
1528
1529         if (iolink1->node_to != iolink2->node_to) {
1530                 /* CPU->CPU  link*/
1531                 cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
1532                 if (cpu_dev) {
1533                         list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) {
1534                                 if (iolink3->node_to != iolink2->node_to)
1535                                         continue;
1536
1537                                 props->weight += iolink3->weight;
1538                                 props->min_latency += iolink3->min_latency;
1539                                 props->max_latency += iolink3->max_latency;
1540                                 props->min_bandwidth = min(props->min_bandwidth,
1541                                                            iolink3->min_bandwidth);
1542                                 props->max_bandwidth = min(props->max_bandwidth,
1543                                                            iolink3->max_bandwidth);
1544                                 break;
1545                         }
1546                 } else {
1547                         WARN(1, "CPU node not found");
1548                 }
1549         }
1550
1551         props->node_from = from;
1552         props->node_to = to;
1553         peer->node_props.p2p_links_count++;
1554         list_add_tail(&props->list, &peer->p2p_link_props);
1555         ret = kfd_build_p2p_node_entry(peer, props);
1556
1557         return ret;
1558 }
1559 #endif
1560
1561 static int kfd_dev_create_p2p_links(void)
1562 {
1563         struct kfd_topology_device *dev;
1564         struct kfd_topology_device *new_dev;
1565 #if defined(CONFIG_HSA_AMD_P2P)
1566         uint32_t i;
1567 #endif
1568         uint32_t k;
1569         int ret = 0;
1570
1571         k = 0;
1572         list_for_each_entry(dev, &topology_device_list, list)
1573                 k++;
1574         if (k < 2)
1575                 return 0;
1576
1577         new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list);
1578         if (WARN_ON(!new_dev->gpu))
1579                 return 0;
1580
1581         k--;
1582
1583         /* create in-direct links */
1584         ret = kfd_create_indirect_link_prop(new_dev, k);
1585         if (ret < 0)
1586                 goto out;
1587
1588         /* create p2p links */
1589 #if defined(CONFIG_HSA_AMD_P2P)
1590         i = 0;
1591         list_for_each_entry(dev, &topology_device_list, list) {
1592                 if (dev == new_dev)
1593                         break;
1594                 if (!dev->gpu || !dev->gpu->adev ||
1595                     (dev->gpu->kfd->hive_id &&
1596                      dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id))
1597                         goto next;
1598
1599                 /* check if node(s) is/are peer accessible in one direction or bi-direction */
1600                 ret = kfd_add_peer_prop(new_dev, dev, i, k);
1601                 if (ret < 0)
1602                         goto out;
1603
1604                 ret = kfd_add_peer_prop(dev, new_dev, k, i);
1605                 if (ret < 0)
1606                         goto out;
1607 next:
1608                 i++;
1609         }
1610 #endif
1611
1612 out:
1613         return ret;
1614 }
1615
1616 /* Helper function. See kfd_fill_gpu_cache_info for parameter description */
1617 static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
1618                                 struct kfd_gpu_cache_info *pcache_info,
1619                                 int cu_bitmask,
1620                                 int cache_type, unsigned int cu_processor_id,
1621                                 int cu_block)
1622 {
1623         unsigned int cu_sibling_map_mask;
1624         int first_active_cu;
1625         struct kfd_cache_properties *pcache = NULL;
1626
1627         cu_sibling_map_mask = cu_bitmask;
1628         cu_sibling_map_mask >>= cu_block;
1629         cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
1630         first_active_cu = ffs(cu_sibling_map_mask);
1631
1632         /* CU could be inactive. In case of shared cache find the first active
1633          * CU. and incase of non-shared cache check if the CU is inactive. If
1634          * inactive active skip it
1635          */
1636         if (first_active_cu) {
1637                 pcache = kfd_alloc_struct(pcache);
1638                 if (!pcache)
1639                         return -ENOMEM;
1640
1641                 memset(pcache, 0, sizeof(struct kfd_cache_properties));
1642                 pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);
1643                 pcache->cache_level = pcache_info[cache_type].cache_level;
1644                 pcache->cache_size = pcache_info[cache_type].cache_size;
1645                 pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
1646
1647                 if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
1648                         pcache->cache_type |= HSA_CACHE_TYPE_DATA;
1649                 if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
1650                         pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
1651                 if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
1652                         pcache->cache_type |= HSA_CACHE_TYPE_CPU;
1653                 if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
1654                         pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
1655
1656                 /* Sibling map is w.r.t processor_id_low, so shift out
1657                  * inactive CU
1658                  */
1659                 cu_sibling_map_mask =
1660                         cu_sibling_map_mask >> (first_active_cu - 1);
1661
1662                 pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
1663                 pcache->sibling_map[1] =
1664                                 (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
1665                 pcache->sibling_map[2] =
1666                                 (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
1667                 pcache->sibling_map[3] =
1668                                 (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
1669
1670                 pcache->sibling_map_size = 4;
1671                 *props_ext = pcache;
1672
1673                 return 0;
1674         }
1675         return 1;
1676 }
1677
1678 /* Helper function. See kfd_fill_gpu_cache_info for parameter description */
1679 static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
1680                                 struct kfd_gpu_cache_info *pcache_info,
1681                                 struct amdgpu_cu_info *cu_info,
1682                                 struct amdgpu_gfx_config *gfx_info,
1683                                 int cache_type, unsigned int cu_processor_id,
1684                                 struct kfd_node *knode)
1685 {
1686         unsigned int cu_sibling_map_mask;
1687         int first_active_cu;
1688         int i, j, k, xcc, start, end;
1689         int num_xcc = NUM_XCC(knode->xcc_mask);
1690         struct kfd_cache_properties *pcache = NULL;
1691         enum amdgpu_memory_partition mode;
1692         struct amdgpu_device *adev = knode->adev;
1693
1694         start = ffs(knode->xcc_mask) - 1;
1695         end = start + num_xcc;
1696         cu_sibling_map_mask = cu_info->bitmap[start][0][0];
1697         cu_sibling_map_mask &=
1698                 ((1 << pcache_info[cache_type].num_cu_shared) - 1);
1699         first_active_cu = ffs(cu_sibling_map_mask);
1700
1701         /* CU could be inactive. In case of shared cache find the first active
1702          * CU. and incase of non-shared cache check if the CU is inactive. If
1703          * inactive active skip it
1704          */
1705         if (first_active_cu) {
1706                 pcache = kfd_alloc_struct(pcache);
1707                 if (!pcache)
1708                         return -ENOMEM;
1709
1710                 memset(pcache, 0, sizeof(struct kfd_cache_properties));
1711                 pcache->processor_id_low = cu_processor_id
1712                                         + (first_active_cu - 1);
1713                 pcache->cache_level = pcache_info[cache_type].cache_level;
1714                 pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
1715
1716                 if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) ||
1717                     KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) ||
1718                     KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0))
1719                         mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
1720                 else
1721                         mode = UNKNOWN_MEMORY_PARTITION_MODE;
1722
1723                 pcache->cache_size = pcache_info[cache_type].cache_size;
1724                 /* Partition mode only affects L3 cache size */
1725                 if (mode && pcache->cache_level == 3)
1726                         pcache->cache_size /= mode;
1727
1728                 if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
1729                         pcache->cache_type |= HSA_CACHE_TYPE_DATA;
1730                 if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
1731                         pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
1732                 if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
1733                         pcache->cache_type |= HSA_CACHE_TYPE_CPU;
1734                 if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
1735                         pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
1736
1737                 /* Sibling map is w.r.t processor_id_low, so shift out
1738                  * inactive CU
1739                  */
1740                 cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
1741                 k = 0;
1742
1743                 for (xcc = start; xcc < end; xcc++) {
1744                         for (i = 0; i < gfx_info->max_shader_engines; i++) {
1745                                 for (j = 0; j < gfx_info->max_sh_per_se; j++) {
1746                                         pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
1747                                         pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
1748                                         pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
1749                                         pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
1750                                         k += 4;
1751
1752                                         cu_sibling_map_mask = cu_info->bitmap[xcc][i % 4][j + i / 4];
1753                                         cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
1754                                 }
1755                         }
1756                 }
1757                 pcache->sibling_map_size = k;
1758                 *props_ext = pcache;
1759                 return 0;
1760         }
1761         return 1;
1762 }
1763
1764 #define KFD_MAX_CACHE_TYPES 6
1765
1766 /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
1767  * tables
1768  */
1769 static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev)
1770 {
1771         struct kfd_gpu_cache_info *pcache_info = NULL;
1772         int i, j, k, xcc, start, end;
1773         int ct = 0;
1774         unsigned int cu_processor_id;
1775         int ret;
1776         unsigned int num_cu_shared;
1777         struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
1778         struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
1779         int gpu_processor_id;
1780         struct kfd_cache_properties *props_ext = NULL;
1781         int num_of_entries = 0;
1782         int num_of_cache_types = 0;
1783         struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
1784
1785
1786         gpu_processor_id = dev->node_props.simd_id_base;
1787
1788         memset(cache_info, 0, sizeof(cache_info));
1789         pcache_info = cache_info;
1790         num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info);
1791         if (!num_of_cache_types) {
1792                 pr_warn("no cache info found\n");
1793                 return;
1794         }
1795
1796         /* For each type of cache listed in the kfd_gpu_cache_info table,
1797          * go through all available Compute Units.
1798          * The [i,j,k] loop will
1799          *              if kfd_gpu_cache_info.num_cu_shared = 1
1800          *                      will parse through all available CU
1801          *              If (kfd_gpu_cache_info.num_cu_shared != 1)
1802          *                      then it will consider only one CU from
1803          *                      the shared unit
1804          */
1805         start = ffs(kdev->xcc_mask) - 1;
1806         end = start + NUM_XCC(kdev->xcc_mask);
1807
1808         for (ct = 0; ct < num_of_cache_types; ct++) {
1809                 cu_processor_id = gpu_processor_id;
1810                 if (pcache_info[ct].cache_level == 1) {
1811                         for (xcc = start; xcc < end; xcc++) {
1812                                 for (i = 0; i < gfx_info->max_shader_engines; i++) {
1813                                         for (j = 0; j < gfx_info->max_sh_per_se; j++) {
1814                                                 for (k = 0; k < gfx_info->max_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
1815
1816                                                         ret = fill_in_l1_pcache(&props_ext, pcache_info,
1817                                                                                 cu_info->bitmap[xcc][i % 4][j + i / 4], ct,
1818                                                                                 cu_processor_id, k);
1819
1820                                                         if (ret < 0)
1821                                                                 break;
1822
1823                                                         if (!ret) {
1824                                                                 num_of_entries++;
1825                                                                 list_add_tail(&props_ext->list, &dev->cache_props);
1826                                                         }
1827
1828                                                         /* Move to next CU block */
1829                                                         num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
1830                                                                 gfx_info->max_cu_per_sh) ?
1831                                                                 pcache_info[ct].num_cu_shared :
1832                                                                 (gfx_info->max_cu_per_sh - k);
1833                                                         cu_processor_id += num_cu_shared;
1834                                                 }
1835                                         }
1836                                 }
1837                         }
1838                 } else {
1839                         ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
1840                                                    cu_info, gfx_info, ct, cu_processor_id, kdev);
1841
1842                         if (ret < 0)
1843                                 break;
1844
1845                         if (!ret) {
1846                                 num_of_entries++;
1847                                 list_add_tail(&props_ext->list, &dev->cache_props);
1848                         }
1849                 }
1850         }
1851         dev->node_props.caches_count += num_of_entries;
1852         pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
1853 }
1854
1855 static int kfd_topology_add_device_locked(struct kfd_node *gpu,
1856                                           struct kfd_topology_device **dev)
1857 {
1858         int proximity_domain = ++topology_crat_proximity_domain;
1859         struct list_head temp_topology_device_list;
1860         void *crat_image = NULL;
1861         size_t image_size = 0;
1862         int res;
1863
1864         res = kfd_create_crat_image_virtual(&crat_image, &image_size,
1865                                             COMPUTE_UNIT_GPU, gpu,
1866                                             proximity_domain);
1867         if (res) {
1868                 dev_err(gpu->adev->dev, "Error creating VCRAT\n");
1869                 topology_crat_proximity_domain--;
1870                 goto err;
1871         }
1872
1873         INIT_LIST_HEAD(&temp_topology_device_list);
1874
1875         res = kfd_parse_crat_table(crat_image,
1876                                    &temp_topology_device_list,
1877                                    proximity_domain);
1878         if (res) {
1879                 dev_err(gpu->adev->dev, "Error parsing VCRAT\n");
1880                 topology_crat_proximity_domain--;
1881                 goto err;
1882         }
1883
1884         kfd_topology_update_device_list(&temp_topology_device_list,
1885                                         &topology_device_list);
1886
1887         *dev = kfd_assign_gpu(gpu);
1888         if (WARN_ON(!*dev)) {
1889                 res = -ENODEV;
1890                 goto err;
1891         }
1892
1893         /* Fill the cache affinity information here for the GPUs
1894          * using VCRAT
1895          */
1896         kfd_fill_cache_non_crat_info(*dev, gpu);
1897
1898         /* Update the SYSFS tree, since we added another topology
1899          * device
1900          */
1901         res = kfd_topology_update_sysfs();
1902         if (!res)
1903                 sys_props.generation_count++;
1904         else
1905                 dev_err(gpu->adev->dev, "Failed to update GPU to sysfs topology. res=%d\n",
1906                         res);
1907
1908 err:
1909         kfd_destroy_crat_image(crat_image);
1910         return res;
1911 }
1912
1913 static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *dev)
1914 {
1915         bool firmware_supported = true;
1916
1917         if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) &&
1918                         KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) {
1919                 uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version &
1920                                                 AMDGPU_MES_API_VERSION_MASK) >>
1921                                                 AMDGPU_MES_API_VERSION_SHIFT;
1922                 uint32_t mes_rev = dev->gpu->adev->mes.sched_version &
1923                                                 AMDGPU_MES_VERSION_MASK;
1924
1925                 firmware_supported = (mes_api_rev >= 14) && (mes_rev >= 64);
1926                 goto out;
1927         }
1928
1929         /*
1930          * Note: Any unlisted devices here are assumed to support exception handling.
1931          * Add additional checks here as needed.
1932          */
1933         switch (KFD_GC_VERSION(dev->gpu)) {
1934         case IP_VERSION(9, 0, 1):
1935                 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768;
1936                 break;
1937         case IP_VERSION(9, 1, 0):
1938         case IP_VERSION(9, 2, 1):
1939         case IP_VERSION(9, 2, 2):
1940         case IP_VERSION(9, 3, 0):
1941         case IP_VERSION(9, 4, 0):
1942                 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459;
1943                 break;
1944         case IP_VERSION(9, 4, 1):
1945                 firmware_supported = dev->gpu->kfd->mec_fw_version >= 60;
1946                 break;
1947         case IP_VERSION(9, 4, 2):
1948                 firmware_supported = dev->gpu->kfd->mec_fw_version >= 51;
1949                 break;
1950         case IP_VERSION(10, 1, 10):
1951         case IP_VERSION(10, 1, 2):
1952         case IP_VERSION(10, 1, 1):
1953                 firmware_supported = dev->gpu->kfd->mec_fw_version >= 144;
1954                 break;
1955         case IP_VERSION(10, 3, 0):
1956         case IP_VERSION(10, 3, 2):
1957         case IP_VERSION(10, 3, 1):
1958         case IP_VERSION(10, 3, 4):
1959         case IP_VERSION(10, 3, 5):
1960                 firmware_supported = dev->gpu->kfd->mec_fw_version >= 89;
1961                 break;
1962         case IP_VERSION(10, 1, 3):
1963         case IP_VERSION(10, 3, 3):
1964                 firmware_supported = false;
1965                 break;
1966         default:
1967                 break;
1968         }
1969
1970 out:
1971         if (firmware_supported)
1972                 dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED;
1973 }
1974
1975 static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
1976 {
1977         dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
1978                                 HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1979                                 HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1980
1981         dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT |
1982                         HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
1983                         HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
1984
1985         if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
1986                 dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
1987
1988         if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
1989                 if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) ||
1990                     KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4))
1991                         dev->node_props.debug_prop |=
1992                                 HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 |
1993                                 HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3;
1994                 else
1995                         dev->node_props.debug_prop |=
1996                                 HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
1997                                 HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
1998
1999                 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
2000                         dev->node_props.capability |=
2001                                 HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
2002
2003                 dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED;
2004         } else {
2005                 dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
2006                                         HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
2007
2008                 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
2009                         dev->node_props.capability |=
2010                                 HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
2011
2012                 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0))
2013                         dev->node_props.capability |=
2014                                 HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED;
2015         }
2016
2017         kfd_topology_set_dbg_firmware_support(dev);
2018 }
2019
2020 int kfd_topology_add_device(struct kfd_node *gpu)
2021 {
2022         uint32_t gpu_id;
2023         struct kfd_topology_device *dev;
2024         int res = 0;
2025         int i;
2026         const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
2027         struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config;
2028         struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info;
2029
2030         if (gpu->xcp && !gpu->xcp->ddev) {
2031                 dev_warn(gpu->adev->dev,
2032                          "Won't add GPU to topology since it has no drm node assigned.");
2033                 return 0;
2034         } else {
2035                 dev_dbg(gpu->adev->dev, "Adding new GPU to topology\n");
2036         }
2037
2038         /* Check to see if this gpu device exists in the topology_device_list.
2039          * If so, assign the gpu to that device,
2040          * else create a Virtual CRAT for this gpu device and then parse that
2041          * CRAT to create a new topology device. Once created assign the gpu to
2042          * that topology device
2043          */
2044         down_write(&topology_lock);
2045         dev = kfd_assign_gpu(gpu);
2046         if (!dev)
2047                 res = kfd_topology_add_device_locked(gpu, &dev);
2048         up_write(&topology_lock);
2049         if (res)
2050                 return res;
2051
2052         gpu_id = kfd_generate_gpu_id(gpu);
2053         dev->gpu_id = gpu_id;
2054         gpu->id = gpu_id;
2055
2056         kfd_dev_create_p2p_links();
2057
2058         /* TODO: Move the following lines to function
2059          *      kfd_add_non_crat_information
2060          */
2061
2062         /* Fill-in additional information that is not available in CRAT but
2063          * needed for the topology
2064          */
2065         for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) {
2066                 dev->node_props.name[i] = __tolower(asic_name[i]);
2067                 if (asic_name[i] == '\0')
2068                         break;
2069         }
2070         dev->node_props.name[i] = '\0';
2071
2072         dev->node_props.simd_arrays_per_engine =
2073                 gfx_info->max_sh_per_se;
2074
2075         dev->node_props.gfx_target_version =
2076                                 gpu->kfd->device_info.gfx_target_version;
2077         dev->node_props.vendor_id = gpu->adev->pdev->vendor;
2078         dev->node_props.device_id = gpu->adev->pdev->device;
2079         dev->node_props.capability |=
2080                 ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) &
2081                         HSA_CAP_ASIC_REVISION_MASK);
2082
2083         dev->node_props.location_id = pci_dev_id(gpu->adev->pdev);
2084         if (gpu->kfd->num_nodes > 1)
2085                 dev->node_props.location_id |= dev->gpu->node_id;
2086
2087         dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus);
2088         dev->node_props.max_engine_clk_fcompute =
2089                 amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev);
2090         dev->node_props.max_engine_clk_ccompute =
2091                 cpufreq_quick_get_max(0) / 1000;
2092
2093         if (gpu->xcp)
2094                 dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index;
2095         else
2096                 dev->node_props.drm_render_minor =
2097                                 gpu->kfd->shared_resources.drm_render_minor;
2098
2099         dev->node_props.hive_id = gpu->kfd->hive_id;
2100         dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu);
2101         dev->node_props.num_sdma_xgmi_engines =
2102                                         kfd_get_num_xgmi_sdma_engines(gpu);
2103         dev->node_props.num_sdma_queues_per_engine =
2104                                 gpu->kfd->device_info.num_sdma_queues_per_engine -
2105                                 gpu->kfd->device_info.num_reserved_sdma_queues_per_engine;
2106         dev->node_props.num_gws = (dev->gpu->gws &&
2107                 dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
2108                 dev->gpu->adev->gds.gws_size : 0;
2109         dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
2110
2111         kfd_fill_mem_clk_max_info(dev);
2112         kfd_fill_iolink_non_crat_info(dev);
2113
2114         switch (dev->gpu->adev->asic_type) {
2115         case CHIP_KAVERI:
2116         case CHIP_HAWAII:
2117         case CHIP_TONGA:
2118                 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 <<
2119                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
2120                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
2121                 break;
2122         case CHIP_CARRIZO:
2123         case CHIP_FIJI:
2124         case CHIP_POLARIS10:
2125         case CHIP_POLARIS11:
2126         case CHIP_POLARIS12:
2127         case CHIP_VEGAM:
2128                 pr_debug("Adding doorbell packet type capability\n");
2129                 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
2130                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
2131                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
2132                 break;
2133         default:
2134                 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1))
2135                         WARN(1, "Unexpected ASIC family %u",
2136                              dev->gpu->adev->asic_type);
2137                 else
2138                         kfd_topology_set_capabilities(dev);
2139         }
2140
2141         /*
2142          * Overwrite ATS capability according to needs_iommu_device to fix
2143          * potential missing corresponding bit in CRAT of BIOS.
2144          */
2145         dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
2146
2147         /* Fix errors in CZ CRAT.
2148          * simd_count: Carrizo CRAT reports wrong simd_count, probably
2149          *              because it doesn't consider masked out CUs
2150          * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
2151          */
2152         if (dev->gpu->adev->asic_type == CHIP_CARRIZO) {
2153                 dev->node_props.simd_count =
2154                         cu_info->simd_per_cu * cu_info->number;
2155                 dev->node_props.max_waves_per_simd = 10;
2156         }
2157
2158         /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
2159         dev->node_props.capability |=
2160                 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
2161                 HSA_CAP_SRAM_EDCSUPPORTED : 0;
2162         dev->node_props.capability |=
2163                 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
2164                 HSA_CAP_MEM_EDCSUPPORTED : 0;
2165
2166         if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1))
2167                 dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ?
2168                         HSA_CAP_RASEVENTNOTIFY : 0;
2169
2170         if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev))
2171                 dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
2172
2173         if (dev->gpu->adev->gmc.is_app_apu ||
2174                 dev->gpu->adev->gmc.xgmi.connected_to_cpu)
2175                 dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS;
2176
2177         kfd_queue_ctx_save_restore_size(dev);
2178
2179         kfd_debug_print_topology();
2180
2181         kfd_notify_gpu_change(gpu_id, 1);
2182
2183         return 0;
2184 }
2185
2186 /**
2187  * kfd_topology_update_io_links() - Update IO links after device removal.
2188  * @proximity_domain: Proximity domain value of the dev being removed.
2189  *
2190  * The topology list currently is arranged in increasing order of
2191  * proximity domain.
2192  *
2193  * Two things need to be done when a device is removed:
2194  * 1. All the IO links to this device need to be removed.
2195  * 2. All nodes after the current device node need to move
2196  *    up once this device node is removed from the topology
2197  *    list. As a result, the proximity domain values for
2198  *    all nodes after the node being deleted reduce by 1.
2199  *    This would also cause the proximity domain values for
2200  *    io links to be updated based on new proximity domain
2201  *    values.
2202  *
2203  * Context: The caller must hold write topology_lock.
2204  */
2205 static void kfd_topology_update_io_links(int proximity_domain)
2206 {
2207         struct kfd_topology_device *dev;
2208         struct kfd_iolink_properties *iolink, *p2plink, *tmp;
2209
2210         list_for_each_entry(dev, &topology_device_list, list) {
2211                 if (dev->proximity_domain > proximity_domain)
2212                         dev->proximity_domain--;
2213
2214                 list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) {
2215                         /*
2216                          * If there is an io link to the dev being deleted
2217                          * then remove that IO link also.
2218                          */
2219                         if (iolink->node_to == proximity_domain) {
2220                                 list_del(&iolink->list);
2221                                 dev->node_props.io_links_count--;
2222                         } else {
2223                                 if (iolink->node_from > proximity_domain)
2224                                         iolink->node_from--;
2225                                 if (iolink->node_to > proximity_domain)
2226                                         iolink->node_to--;
2227                         }
2228                 }
2229
2230                 list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) {
2231                         /*
2232                          * If there is a p2p link to the dev being deleted
2233                          * then remove that p2p link also.
2234                          */
2235                         if (p2plink->node_to == proximity_domain) {
2236                                 list_del(&p2plink->list);
2237                                 dev->node_props.p2p_links_count--;
2238                         } else {
2239                                 if (p2plink->node_from > proximity_domain)
2240                                         p2plink->node_from--;
2241                                 if (p2plink->node_to > proximity_domain)
2242                                         p2plink->node_to--;
2243                         }
2244                 }
2245         }
2246 }
2247
2248 int kfd_topology_remove_device(struct kfd_node *gpu)
2249 {
2250         struct kfd_topology_device *dev, *tmp;
2251         uint32_t gpu_id;
2252         int res = -ENODEV;
2253         int i = 0;
2254
2255         down_write(&topology_lock);
2256
2257         list_for_each_entry_safe(dev, tmp, &topology_device_list, list) {
2258                 if (dev->gpu == gpu) {
2259                         gpu_id = dev->gpu_id;
2260                         kfd_remove_sysfs_node_entry(dev);
2261                         kfd_release_topology_device(dev);
2262                         sys_props.num_devices--;
2263                         kfd_topology_update_io_links(i);
2264                         topology_crat_proximity_domain = sys_props.num_devices-1;
2265                         sys_props.generation_count++;
2266                         res = 0;
2267                         if (kfd_topology_update_sysfs() < 0)
2268                                 kfd_topology_release_sysfs();
2269                         break;
2270                 }
2271                 i++;
2272         }
2273
2274         up_write(&topology_lock);
2275
2276         if (!res)
2277                 kfd_notify_gpu_change(gpu_id, 0);
2278
2279         return res;
2280 }
2281
2282 /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
2283  *      topology. If GPU device is found @idx, then valid kfd_dev pointer is
2284  *      returned through @kdev
2285  * Return -     0: On success (@kdev will be NULL for non GPU nodes)
2286  *              -1: If end of list
2287  */
2288 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev)
2289 {
2290
2291         struct kfd_topology_device *top_dev;
2292         uint8_t device_idx = 0;
2293
2294         *kdev = NULL;
2295         down_read(&topology_lock);
2296
2297         list_for_each_entry(top_dev, &topology_device_list, list) {
2298                 if (device_idx == idx) {
2299                         *kdev = top_dev->gpu;
2300                         up_read(&topology_lock);
2301                         return 0;
2302                 }
2303
2304                 device_idx++;
2305         }
2306
2307         up_read(&topology_lock);
2308
2309         return -1;
2310
2311 }
2312
2313 static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
2314 {
2315         int first_cpu_of_numa_node;
2316
2317         if (!cpumask || cpumask == cpu_none_mask)
2318                 return -1;
2319         first_cpu_of_numa_node = cpumask_first(cpumask);
2320         if (first_cpu_of_numa_node >= nr_cpu_ids)
2321                 return -1;
2322 #ifdef CONFIG_X86_64
2323         return cpu_data(first_cpu_of_numa_node).topo.apicid;
2324 #else
2325         return first_cpu_of_numa_node;
2326 #endif
2327 }
2328
2329 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
2330  *      of the given NUMA node (numa_node_id)
2331  * Return -1 on failure
2332  */
2333 int kfd_numa_node_to_apic_id(int numa_node_id)
2334 {
2335         if (numa_node_id == -1) {
2336                 pr_warn("Invalid NUMA Node. Use online CPU mask\n");
2337                 return kfd_cpumask_to_apic_id(cpu_online_mask);
2338         }
2339         return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
2340 }
2341
2342 #if defined(CONFIG_DEBUG_FS)
2343
2344 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
2345 {
2346         struct kfd_topology_device *dev;
2347         unsigned int i = 0;
2348         int r = 0;
2349
2350         down_read(&topology_lock);
2351
2352         list_for_each_entry(dev, &topology_device_list, list) {
2353                 if (!dev->gpu) {
2354                         i++;
2355                         continue;
2356                 }
2357
2358                 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
2359                 r = dqm_debugfs_hqds(m, dev->gpu->dqm);
2360                 if (r)
2361                         break;
2362         }
2363
2364         up_read(&topology_lock);
2365
2366         return r;
2367 }
2368
2369 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
2370 {
2371         struct kfd_topology_device *dev;
2372         unsigned int i = 0;
2373         int r = 0;
2374
2375         down_read(&topology_lock);
2376
2377         list_for_each_entry(dev, &topology_device_list, list) {
2378                 if (!dev->gpu) {
2379                         i++;
2380                         continue;
2381                 }
2382
2383                 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
2384                 r = pm_debugfs_runlist(m, &dev->gpu->dqm->packet_mgr);
2385                 if (r)
2386                         break;
2387         }
2388
2389         up_read(&topology_lock);
2390
2391         return r;
2392 }
2393
2394 #endif
This page took 0.175953 seconds and 4 git commands to generate.