+int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+ int ret = 0;
+ struct ras_poison_msg poison_msg;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ memset(&poison_msg, 0, sizeof(poison_msg));
+ poison_msg.block = block;
+ poison_msg.pasid = pasid;
+ poison_msg.reset = reset;
+ poison_msg.pasid_fn = pasid_fn;
+ poison_msg.data = data;
+
+ ret = kfifo_put(&con->poison_fifo, poison_msg);
+ if (!ret) {
+ dev_err(adev->dev, "Poison message fifo is full!\n");
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev,
+ struct ras_poison_msg *poison_msg)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ return kfifo_get(&con->poison_fifo, poison_msg);
+}
+
+static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log)
+{
+ mutex_init(&ecc_log->lock);
+
+ /* Set any value as siphash key */
+ memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key));
+
+ INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL);
+ ecc_log->de_updated = false;
+}
+
+static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ struct ras_ecc_err *ecc_err;
+
+ mutex_lock(&ecc_log->lock);
+ radix_tree_for_each_slot(slot, &ecc_log->de_page_tree, &iter, 0) {
+ ecc_err = radix_tree_deref_slot(slot);
+ kfree(ecc_err->err_pages.pfn);
+ kfree(ecc_err);
+ radix_tree_iter_delete(&ecc_log->de_page_tree, &iter, slot);
+ }
+ mutex_unlock(&ecc_log->lock);
+
+ mutex_destroy(&ecc_log->lock);
+ ecc_log->de_updated = false;
+}
+
+static void amdgpu_ras_do_page_retirement(struct work_struct *work)
+{
+ struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
+ page_retirement_dwork.work);
+ struct amdgpu_device *adev = con->adev;
+ struct ras_err_data err_data;
+
+ if (amdgpu_in_reset(adev) || atomic_read(&con->in_recovery))
+ return;
+
+ amdgpu_ras_error_data_init(&err_data);
+
+ amdgpu_umc_handle_bad_pages(adev, &err_data);
+
+ amdgpu_ras_error_data_fini(&err_data);
+
+ mutex_lock(&con->umc_ecc_log.lock);
+ if (radix_tree_tagged(&con->umc_ecc_log.de_page_tree,
+ UMC_ECC_NEW_DETECTED_TAG))
+ schedule_delayed_work(&con->page_retirement_dwork,
+ msecs_to_jiffies(AMDGPU_RAS_RETIRE_PAGE_INTERVAL));
+ mutex_unlock(&con->umc_ecc_log.lock);
+}
+
+static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev,
+ enum amdgpu_ras_block ras_block, uint32_t timeout_ms)
+{
+ int ret = 0;
+ struct ras_ecc_log_info *ecc_log;
+ struct ras_query_if info;
+ uint32_t timeout = timeout_ms;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ memset(&info, 0, sizeof(info));
+ info.head.block = ras_block;
+
+ ecc_log = &ras->umc_ecc_log;
+ ecc_log->de_updated = false;
+ do {
+ ret = amdgpu_ras_query_error_status(adev, &info);
+ if (ret) {
+ dev_err(adev->dev, "Failed to query ras error! ret:%d\n", ret);
+ return ret;
+ }
+
+ if (timeout && !ecc_log->de_updated) {
+ msleep(1);
+ timeout--;
+ }
+ } while (timeout && !ecc_log->de_updated);
+
+ if (timeout_ms && !timeout) {
+ dev_warn(adev->dev, "Can't find deferred error\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
+ uint32_t timeout)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret;
+
+ ret = amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC, timeout);
+ if (!ret)
+ schedule_delayed_work(&con->page_retirement_dwork, 0);
+}
+
+static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
+ struct ras_poison_msg *poison_msg)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint32_t reset = poison_msg->reset;
+ uint16_t pasid = poison_msg->pasid;
+
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
+ if (poison_msg->pasid_fn)
+ poison_msg->pasid_fn(adev, pasid, poison_msg->data);
+
+ if (reset) {
+ flush_delayed_work(&con->page_retirement_dwork);
+
+ con->gpu_reset_flags |= reset;
+ amdgpu_ras_reset_gpu(adev);
+ }
+
+ return 0;
+}
+