drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  *
  21  * The above copyright notice and this permission notice (including the
  22  * next paragraph) shall be included in all copies or substantial portions
  23  * of the Software.
  24  *
  25  */
  26 /*
  27  * Authors:
  28  *    Christian König <[email protected]>
  29  */
  30
  31 /**
  32  * DOC: MMU Notifier
  33  *
  34  * For coherent userptr handling registers an MMU notifier to inform the driver
  35  * about updates on the page tables of a process.
  36  *
  37  * When somebody tries to invalidate the page tables we block the update until
  38  * all operations on the pages in question are completed, then those pages are
  39  * marked as accessed and also dirty if it wasn't a read only access.
  40  *
  41  * New command submissions using the userptrs in question are delayed until all
  42  * page table invalidation are completed and we once more see a coherent process
  43  * address space.
  44  */
  45
  46 #include <linux/firmware.h>
  47 #include <linux/module.h>
  48 #include <linux/mmu_notifier.h>
  49 #include <linux/interval_tree.h>
  50 #include <drm/drmP.h>
  51 #include <drm/drm.h>
  52
  53 #include "amdgpu.h"
  54 #include "amdgpu_amdkfd.h"
  55
  56 /**
  57  * struct amdgpu_mn
  58  *
  59  * @adev: amdgpu device pointer
  60  * @mm: process address space
  61  * @mn: MMU notifier structur
  62  * @work: destruction work item
  63  * @node: hash table node to find structure by adev and mn
  64  * @lock: rw semaphore protecting the notifier nodes
  65  * @objects: interval tree containing amdgpu_mn_nodes
  66  * @read_lock: mutex for recursive locking of @lock
  67  * @recursion: depth of recursion
  68  *
  69  * Data for each amdgpu device and process address space.
  70  */
  71 struct amdgpu_mn {
  72         /* constant after initialisation */
  73         struct amdgpu_device    *adev;
  74         struct mm_struct        *mm;
  75         struct mmu_notifier     mn;
  76         enum amdgpu_mn_type     type;
  77
  78         /* only used on destruction */
  79         struct work_struct      work;
  80
  81         /* protected by adev->mn_lock */
  82         struct hlist_node       node;
  83
  84         /* objects protected by lock */
  85         struct rw_semaphore     lock;
  86         struct rb_root_cached   objects;
  87         struct mutex            read_lock;
  88         atomic_t                recursion;
  89 };
  90
  91 /**
  92  * struct amdgpu_mn_node
  93  *
  94  * @it: interval node defining start-last of the affected address range
  95  * @bos: list of all BOs in the affected address range
  96  *
  97  * Manages all BOs which are affected of a certain range of address space.
  98  */
  99 struct amdgpu_mn_node {
 100         struct interval_tree_node       it;
 101         struct list_head                bos;
 102 };
 103
 104 /**
 105  * amdgpu_mn_destroy - destroy the MMU notifier
 106  *
 107  * @work: previously sheduled work item
 108  *
 109  * Lazy destroys the notifier from a work item
 110  */
 111 static void amdgpu_mn_destroy(struct work_struct *work)
 112 {
 113         struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
 114         struct amdgpu_device *adev = amn->adev;
 115         struct amdgpu_mn_node *node, *next_node;
 116         struct amdgpu_bo *bo, *next_bo;
 117
 118         mutex_lock(&adev->mn_lock);
 119         down_write(&amn->lock);
 120         hash_del(&amn->node);
 121         rbtree_postorder_for_each_entry_safe(node, next_node,
 122                                              &amn->objects.rb_root, it.rb) {
 123                 list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
 124                         bo->mn = NULL;
 125                         list_del_init(&bo->mn_list);
 126                 }
 127                 kfree(node);
 128         }
 129         up_write(&amn->lock);
 130         mutex_unlock(&adev->mn_lock);
 131         mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
 132         kfree(amn);
 133 }
 134
 135 /**
 136  * amdgpu_mn_release - callback to notify about mm destruction
 137  *
 138  * @mn: our notifier
 139  * @mm: the mm this callback is about
 140  *
 141  * Shedule a work item to lazy destroy our notifier.
 142  */
 143 static void amdgpu_mn_release(struct mmu_notifier *mn,
 144                               struct mm_struct *mm)
 145 {
 146         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 147
 148         INIT_WORK(&amn->work, amdgpu_mn_destroy);
 149         schedule_work(&amn->work);
 150 }
 151
 152
 153 /**
 154  * amdgpu_mn_lock - take the write side lock for this notifier
 155  *
 156  * @mn: our notifier
 157  */
 158 void amdgpu_mn_lock(struct amdgpu_mn *mn)
 159 {
 160         if (mn)
 161                 down_write(&mn->lock);
 162 }
 163
 164 /**
 165  * amdgpu_mn_unlock - drop the write side lock for this notifier
 166  *
 167  * @mn: our notifier
 168  */
 169 void amdgpu_mn_unlock(struct amdgpu_mn *mn)
 170 {
 171         if (mn)
 172                 up_write(&mn->lock);
 173 }
 174
 175 /**
 176  * amdgpu_mn_read_lock - take the read side lock for this notifier
 177  *
 178  * @amn: our notifier
 179  */
 180 static void amdgpu_mn_read_lock(struct amdgpu_mn *amn)
 181 {
 182         mutex_lock(&amn->read_lock);
 183         if (atomic_inc_return(&amn->recursion) == 1)
 184                 down_read_non_owner(&amn->lock);
 185         mutex_unlock(&amn->read_lock);
 186 }
 187
 188 /**
 189  * amdgpu_mn_read_unlock - drop the read side lock for this notifier
 190  *
 191  * @amn: our notifier
 192  */
 193 static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
 194 {
 195         if (atomic_dec_return(&amn->recursion) == 0)
 196                 up_read_non_owner(&amn->lock);
 197 }
 198
 199 /**
 200  * amdgpu_mn_invalidate_node - unmap all BOs of a node
 201  *
 202  * @node: the node with the BOs to unmap
 203  * @start: start of address range affected
 204  * @end: end of address range affected
 205  *
 206  * Block for operations on BOs to finish and mark pages as accessed and
 207  * potentially dirty.
 208  */
 209 static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 210                                       unsigned long start,
 211                                       unsigned long end)
 212 {
 213         struct amdgpu_bo *bo;
 214         long r;
 215
 216         list_for_each_entry(bo, &node->bos, mn_list) {
 217
 218                 if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
 219                         continue;
 220
 221                 r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
 222                         true, false, MAX_SCHEDULE_TIMEOUT);
 223                 if (r <= 0)
 224                         DRM_ERROR("(%ld) failed to wait for user bo\n", r);
 225
 226                 amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
 227         }
 228 }
 229
 230 /**
 231  * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
 232  *
 233  * @mn: our notifier
 234  * @mm: the mm this callback is about
 235  * @start: start of updated range
 236  * @end: end of updated range
 237  *
 238  * Block for operations on BOs to finish and mark pages as accessed and
 239  * potentially dirty.
 240  */
 241 static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 242                                                  struct mm_struct *mm,
 243                                                  unsigned long start,
 244                                                  unsigned long end)
 245 {
 246         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 247         struct interval_tree_node *it;
 248
 249         /* notification is exclusive, but interval is inclusive */
 250         end -= 1;
 251
 252         amdgpu_mn_read_lock(amn);
 253
 254         it = interval_tree_iter_first(&amn->objects, start, end);
 255         while (it) {
 256                 struct amdgpu_mn_node *node;
 257
 258                 node = container_of(it, struct amdgpu_mn_node, it);
 259                 it = interval_tree_iter_next(it, start, end);
 260
 261                 amdgpu_mn_invalidate_node(node, start, end);
 262         }
 263 }
 264
 265 /**
 266  * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
 267  *
 268  * @mn: our notifier
 269  * @mn: the mm this callback is about
 270  * @start: start of updated range
 271  * @end: end of updated range
 272  *
 273  * We temporarily evict all BOs between start and end. This
 274  * necessitates evicting all user-mode queues of the process. The BOs
 275  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
 276  */
 277 static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 278                                                  struct mm_struct *mm,
 279                                                  unsigned long start,
 280                                                  unsigned long end)
 281 {
 282         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 283         struct interval_tree_node *it;
 284
 285         /* notification is exclusive, but interval is inclusive */
 286         end -= 1;
 287
 288         amdgpu_mn_read_lock(amn);
 289
 290         it = interval_tree_iter_first(&amn->objects, start, end);
 291         while (it) {
 292                 struct amdgpu_mn_node *node;
 293                 struct amdgpu_bo *bo;
 294
 295                 node = container_of(it, struct amdgpu_mn_node, it);
 296                 it = interval_tree_iter_next(it, start, end);
 297
 298                 list_for_each_entry(bo, &node->bos, mn_list) {
 299                         struct kgd_mem *mem = bo->kfd_bo;
 300
 301                         if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
 302                                                          start, end))
 303                                 amdgpu_amdkfd_evict_userptr(mem, mm);
 304                 }
 305         }
 306 }
 307
 308 /**
 309  * amdgpu_mn_invalidate_range_end - callback to notify about mm change
 310  *
 311  * @mn: our notifier
 312  * @mm: the mm this callback is about
 313  * @start: start of updated range
 314  * @end: end of updated range
 315  *
 316  * Release the lock again to allow new command submissions.
 317  */
 318 static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
 319                                            struct mm_struct *mm,
 320                                            unsigned long start,
 321                                            unsigned long end)
 322 {
 323         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 324
 325         amdgpu_mn_read_unlock(amn);
 326 }
 327
 328 static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
 329         [AMDGPU_MN_TYPE_GFX] = {
 330                 .release = amdgpu_mn_release,
 331                 .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
 332                 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
 333         },
 334         [AMDGPU_MN_TYPE_HSA] = {
 335                 .release = amdgpu_mn_release,
 336                 .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
 337                 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
 338         },
 339 };
 340
 341 /* Low bits of any reasonable mm pointer will be unused due to struct
 342  * alignment. Use these bits to make a unique key from the mm pointer
 343  * and notifier type.
 344  */
 345 #define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
 346
 347 /**
 348  * amdgpu_mn_get - create notifier context
 349  *
 350  * @adev: amdgpu device pointer
 351  * @type: type of MMU notifier context
 352  *
 353  * Creates a notifier context for current->mm.
 354  */
 355 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 356                                 enum amdgpu_mn_type type)
 357 {
 358         struct mm_struct *mm = current->mm;
 359         struct amdgpu_mn *amn;
 360         unsigned long key = AMDGPU_MN_KEY(mm, type);
 361         int r;
 362
 363         mutex_lock(&adev->mn_lock);
 364         if (down_write_killable(&mm->mmap_sem)) {
 365                 mutex_unlock(&adev->mn_lock);
 366                 return ERR_PTR(-EINTR);
 367         }
 368
 369         hash_for_each_possible(adev->mn_hash, amn, node, key)
 370                 if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
 371                         goto release_locks;
 372
 373         amn = kzalloc(sizeof(*amn), GFP_KERNEL);
 374         if (!amn) {
 375                 amn = ERR_PTR(-ENOMEM);
 376                 goto release_locks;
 377         }
 378
 379         amn->adev = adev;
 380         amn->mm = mm;
 381         init_rwsem(&amn->lock);
 382         amn->type = type;
 383         amn->mn.ops = &amdgpu_mn_ops[type];
 384         amn->objects = RB_ROOT_CACHED;
 385         mutex_init(&amn->read_lock);
 386         atomic_set(&amn->recursion, 0);
 387
 388         r = __mmu_notifier_register(&amn->mn, mm);
 389         if (r)
 390                 goto free_amn;
 391
 392         hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
 393
 394 release_locks:
 395         up_write(&mm->mmap_sem);
 396         mutex_unlock(&adev->mn_lock);
 397
 398         return amn;
 399
 400 free_amn:
 401         up_write(&mm->mmap_sem);
 402         mutex_unlock(&adev->mn_lock);
 403         kfree(amn);
 404
 405         return ERR_PTR(r);
 406 }
 407
 408 /**
 409  * amdgpu_mn_register - register a BO for notifier updates
 410  *
 411  * @bo: amdgpu buffer object
 412  * @addr: userptr addr we should monitor
 413  *
 414  * Registers an MMU notifier for the given BO at the specified address.
 415  * Returns 0 on success, -ERRNO if anything goes wrong.
 416  */
 417 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 418 {
 419         unsigned long end = addr + amdgpu_bo_size(bo) - 1;
 420         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 421         enum amdgpu_mn_type type =
 422                 bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
 423         struct amdgpu_mn *amn;
 424         struct amdgpu_mn_node *node = NULL, *new_node;
 425         struct list_head bos;
 426         struct interval_tree_node *it;
 427
 428         amn = amdgpu_mn_get(adev, type);
 429         if (IS_ERR(amn))
 430                 return PTR_ERR(amn);
 431
 432         new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
 433         if (!new_node)
 434                 return -ENOMEM;
 435
 436         INIT_LIST_HEAD(&bos);
 437
 438         down_write(&amn->lock);
 439
 440         while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
 441                 kfree(node);
 442                 node = container_of(it, struct amdgpu_mn_node, it);
 443                 interval_tree_remove(&node->it, &amn->objects);
 444                 addr = min(it->start, addr);
 445                 end = max(it->last, end);
 446                 list_splice(&node->bos, &bos);
 447         }
 448
 449         if (!node)
 450                 node = new_node;
 451         else
 452                 kfree(new_node);
 453
 454         bo->mn = amn;
 455
 456         node->it.start = addr;
 457         node->it.last = end;
 458         INIT_LIST_HEAD(&node->bos);
 459         list_splice(&bos, &node->bos);
 460         list_add(&bo->mn_list, &node->bos);
 461
 462         interval_tree_insert(&node->it, &amn->objects);
 463
 464         up_write(&amn->lock);
 465
 466         return 0;
 467 }
 468
 469 /**
 470  * amdgpu_mn_unregister - unregister a BO for notifier updates
 471  *
 472  * @bo: amdgpu buffer object
 473  *
 474  * Remove any registration of MMU notifier updates from the buffer object.
 475  */
 476 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 477 {
 478         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 479         struct amdgpu_mn *amn;
 480         struct list_head *head;
 481
 482         mutex_lock(&adev->mn_lock);
 483
 484         amn = bo->mn;
 485         if (amn == NULL) {
 486                 mutex_unlock(&adev->mn_lock);
 487                 return;
 488         }
 489
 490         down_write(&amn->lock);
 491
 492         /* save the next list entry for later */
 493         head = bo->mn_list.next;
 494
 495         bo->mn = NULL;
 496         list_del_init(&bo->mn_list);
 497
 498         if (list_empty(head)) {
 499                 struct amdgpu_mn_node *node;
 500
 501                 node = container_of(head, struct amdgpu_mn_node, bos);
 502                 interval_tree_remove(&node->it, &amn->objects);
 503                 kfree(node);
 504         }
 505
 506         up_write(&amn->lock);
 507         mutex_unlock(&adev->mn_lock);
 508 }
 509