X-Git-Url: https://repo.jachan.dev/J-linux.git/blobdiff_plain/466b179346094e01deccd051a215fe782b59ca68..f5500f385ba344d24468c086eb9c50f5ef4f1419:/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 49c71cfc7fc6..4d387557cc37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -24,6 +24,8 @@ #include #include #include +#include + #include "amdgpu.h" #include "amdgpu_ras.h" #include "amdgpu_atomfirmware.h" @@ -128,6 +130,12 @@ const char *ras_block_string[] = { #define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) +static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, + struct amdgpu_bo **bo_ptr); +static int amdgpu_ras_release_vram(struct amdgpu_device *adev, + struct amdgpu_bo **bo_ptr); + static void amdgpu_ras_self_test(struct amdgpu_device *adev) { /* TODO */ @@ -244,8 +252,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } -/* - * DOC: ras debugfs control interface +/** + * DOC: AMDGPU RAS debugfs control interface * * It accepts struct ras_debug_if who has two members. * @@ -307,6 +315,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; struct ras_debug_if data; + struct amdgpu_bo *bo; int ret = 0; ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data); @@ -324,7 +333,16 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * ret = amdgpu_ras_feature_enable(adev, &data.head, 1); break; case 2: + ret = amdgpu_ras_reserve_vram(adev, + data.inject.address, PAGE_SIZE, &bo); + /* This address might be used already on failure. In fact we can + * perform an injection in such case. + */ + if (ret) + break; + data.inject.address = amdgpu_bo_gpu_offset(bo); ret = amdgpu_ras_error_inject(adev, &data.inject); + amdgpu_ras_release_vram(adev, &bo); break; default: ret = -EINVAL; @@ -764,8 +782,8 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, struct amdgpu_device *adev = con->adev; const unsigned int element_size = sizeof("0xabcdabcd : 0x12345678 : R\n") - 1; - unsigned int start = (ppos + element_size - 1) / element_size; - unsigned int end = (ppos + count - 1) / element_size; + unsigned int start = div64_ul(ppos + element_size - 1, element_size); + unsigned int end = div64_ul(ppos + count - 1, element_size); ssize_t s = 0; struct ras_badpage *bps = NULL; unsigned int bps_count = 0; @@ -1594,12 +1612,9 @@ recovery_out: } /* do some init work after IP late init as dependence. - * TODO - * gpu reset will re-enable ras, need fint out one way to run it again. - * for now, if a gpu reset happened, unless IP enable its ras, the ras state - * will be showed as disabled. + * and it runs in resume/gpu reset/booting up cases. */ -void amdgpu_ras_post_init(struct amdgpu_device *adev) +void amdgpu_ras_resume(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; @@ -1642,6 +1657,19 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev) } } +void amdgpu_ras_suspend(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return; + + amdgpu_ras_disable_all_features(adev, 0); + /* Make sure all ras objects are disabled. */ + if (con->features) + amdgpu_ras_disable_all_features(adev, 1); +} + /* do some fini work before IP fini as dependence */ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) {