]> Git Repo - linux.git/commitdiff
drm/amdkfd: Add device repartition support
authorMukul Joshi <[email protected]>
Fri, 10 Jun 2022 13:41:29 +0000 (09:41 -0400)
committerAlex Deucher <[email protected]>
Fri, 9 Jun 2023 13:45:38 +0000 (09:45 -0400)
GFX9.4.3 will support dynamic repartitioning of the GPU through sysfs.
Add device repartitioning support in KFD to repartition GPU from one
mode to other.

v2: squash in fix ("drm/amdkfd: Fix warning kgd2kfd_unlock_kfd defined but not used")

Signed-off-by: Mukul Joshi <[email protected]>
Reviewed-by: Felix Kuehling <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c

index 9d19c7ceda3f61f3b9c52b3b002e4b8029d7bd09..bbbfe9ec4adf80ac86f56239254158d4f4f50a16 100644 (file)
@@ -773,3 +773,13 @@ bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
        else
                return false;
 }
+
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
+{
+       return kgd2kfd_check_and_lock_kfd();
+}
+
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
+{
+       kgd2kfd_unlock_kfd();
+}
index df07e212c21e9345cbd67d3af5b8f54bb889ed9c..d1d643a050a1d6f2e9cfd10097290dc0668493ea 100644 (file)
@@ -151,6 +151,8 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev);
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev);
 int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
                                enum kgd_engine_type engine,
                                uint32_t vmid, uint64_t gpu_addr,
@@ -373,6 +375,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd);
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
+int kgd2kfd_check_and_lock_kfd(void);
+void kgd2kfd_unlock_kfd(void);
 #else
 static inline int kgd2kfd_init(void)
 {
@@ -438,5 +442,14 @@ static inline
 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
 {
 }
+
+static inline int kgd2kfd_check_and_lock_kfd(void)
+{
+       return 0;
+}
+
+static inline void kgd2kfd_unlock_kfd(void)
+{
+}
 #endif
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
index 069b259f384c3396ccb5465f9c712870ab60ee63..69bac5b801cee4f59439585e795e84eda1b5966e 100644 (file)
@@ -1233,10 +1233,30 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
                return -EINVAL;
        }
 
+       if (!adev->kfd.init_complete)
+               return -EPERM;
+
        mutex_lock(&adev->gfx.partition_mutex);
 
-       ret = adev->gfx.funcs->switch_partition_mode(adev, mode);
+       if (mode == adev->gfx.funcs->query_partition_mode(adev))
+               goto out;
+
+       ret = amdgpu_amdkfd_check_and_lock_kfd(adev);
+       if (ret)
+               goto out;
+
+       amdgpu_amdkfd_device_fini_sw(adev);
+
+       adev->gfx.funcs->switch_partition_mode(adev, mode);
+
+       amdgpu_amdkfd_device_probe(adev);
+       amdgpu_amdkfd_device_init(adev);
+       /* If KFD init failed, return failure */
+       if (!adev->kfd.init_complete)
+               ret = -EIO;
 
+       amdgpu_amdkfd_unlock_kfd(adev);
+out:
        mutex_unlock(&adev->gfx.partition_mutex);
 
        if (ret)
index c776fc5884def1fa7b7f31c5c022d904a95ca312..47d8ac64e877cf334599b668da5d8bc2fecf7b43 100644 (file)
@@ -675,7 +675,7 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev,
 static enum amdgpu_gfx_partition
 gfx_v9_4_3_query_compute_partition(struct amdgpu_device *adev)
 {
-       enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+       enum amdgpu_gfx_partition mode = adev->gfx.partition_mode;
 
        if (adev->nbio.funcs->get_compute_partition_mode)
                mode = adev->nbio.funcs->get_compute_partition_mode(adev);
@@ -689,9 +689,6 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev,
        u32 tmp = 0;
        int num_xcc_per_partition, i, num_xcc;
 
-       if (mode == adev->gfx.partition_mode)
-               return mode;
-
        num_xcc = NUM_XCC(adev->gfx.xcc_mask);
        switch (mode) {
        case AMDGPU_SPX_PARTITION_MODE:
index eb2b44fddf746b8e7c431e24898c01b971ac6561..293787290e36beaedf01eb077a1e83c475bb5a62 100644 (file)
@@ -1356,6 +1356,27 @@ unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node)
                kfd_get_num_sdma_engines(node);
 }
 
+int kgd2kfd_check_and_lock_kfd(void)
+{
+       mutex_lock(&kfd_processes_mutex);
+       if (!hash_empty(kfd_processes_table) || kfd_is_locked()) {
+               mutex_unlock(&kfd_processes_mutex);
+               return -EBUSY;
+       }
+
+       ++kfd_locked;
+       mutex_unlock(&kfd_processes_mutex);
+
+       return 0;
+}
+
+void kgd2kfd_unlock_kfd(void)
+{
+       mutex_lock(&kfd_processes_mutex);
+       --kfd_locked;
+       mutex_unlock(&kfd_processes_mutex);
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 /* This function will send a package to HIQ to hang the HWS
This page took 0.07455 seconds and 4 git commands to generate.