drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  *
  21  * The above copyright notice and this permission notice (including the
  22  * next paragraph) shall be included in all copies or substantial portions
  23  * of the Software.
  24  *
  25  */
  26 /*
  27  * Authors:
  28  *    Christian König <[email protected]>
  29  */
  30
  31 /**
  32  * DOC: MMU Notifier
  33  *
  34  * For coherent userptr handling registers an MMU notifier to inform the driver
  35  * about updates on the page tables of a process.
  36  *
  37  * When somebody tries to invalidate the page tables we block the update until
  38  * all operations on the pages in question are completed, then those pages are
  39  * marked as accessed and also dirty if it wasn't a read only access.
  40  *
  41  * New command submissions using the userptrs in question are delayed until all
  42  * page table invalidation are completed and we once more see a coherent process
  43  * address space.
  44  */
  45
  46 #include <linux/firmware.h>
  47 #include <linux/module.h>
  48 #include <linux/mmu_notifier.h>
  49 #include <linux/interval_tree.h>
  50 #include <drm/drmP.h>
  51 #include <drm/drm.h>
  52
  53 #include "amdgpu.h"
  54 #include "amdgpu_amdkfd.h"
  55
  56 /**
  57  * struct amdgpu_mn
  58  *
  59  * @adev: amdgpu device pointer
  60  * @mm: process address space
  61  * @mn: MMU notifier structure
  62  * @type: type of MMU notifier
  63  * @work: destruction work item
  64  * @node: hash table node to find structure by adev and mn
  65  * @lock: rw semaphore protecting the notifier nodes
  66  * @objects: interval tree containing amdgpu_mn_nodes
  67  * @read_lock: mutex for recursive locking of @lock
  68  * @recursion: depth of recursion
  69  *
  70  * Data for each amdgpu device and process address space.
  71  */
  72 struct amdgpu_mn {
  73         /* constant after initialisation */
  74         struct amdgpu_device    *adev;
  75         struct mm_struct        *mm;
  76         struct mmu_notifier     mn;
  77         enum amdgpu_mn_type     type;
  78
  79         /* only used on destruction */
  80         struct work_struct      work;
  81
  82         /* protected by adev->mn_lock */
  83         struct hlist_node       node;
  84
  85         /* objects protected by lock */
  86         struct rw_semaphore     lock;
  87         struct rb_root_cached   objects;
  88         struct mutex            read_lock;
  89         atomic_t                recursion;
  90 };
  91
  92 /**
  93  * struct amdgpu_mn_node
  94  *
  95  * @it: interval node defining start-last of the affected address range
  96  * @bos: list of all BOs in the affected address range
  97  *
  98  * Manages all BOs which are affected of a certain range of address space.
  99  */
 100 struct amdgpu_mn_node {
 101         struct interval_tree_node       it;
 102         struct list_head                bos;
 103 };
 104
 105 /**
 106  * amdgpu_mn_destroy - destroy the MMU notifier
 107  *
 108  * @work: previously sheduled work item
 109  *
 110  * Lazy destroys the notifier from a work item
 111  */
 112 static void amdgpu_mn_destroy(struct work_struct *work)
 113 {
 114         struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
 115         struct amdgpu_device *adev = amn->adev;
 116         struct amdgpu_mn_node *node, *next_node;
 117         struct amdgpu_bo *bo, *next_bo;
 118
 119         mutex_lock(&adev->mn_lock);
 120         down_write(&amn->lock);
 121         hash_del(&amn->node);
 122         rbtree_postorder_for_each_entry_safe(node, next_node,
 123                                              &amn->objects.rb_root, it.rb) {
 124                 list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
 125                         bo->mn = NULL;
 126                         list_del_init(&bo->mn_list);
 127                 }
 128                 kfree(node);
 129         }
 130         up_write(&amn->lock);
 131         mutex_unlock(&adev->mn_lock);
 132         mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
 133         kfree(amn);
 134 }
 135
 136 /**
 137  * amdgpu_mn_release - callback to notify about mm destruction
 138  *
 139  * @mn: our notifier
 140  * @mm: the mm this callback is about
 141  *
 142  * Shedule a work item to lazy destroy our notifier.
 143  */
 144 static void amdgpu_mn_release(struct mmu_notifier *mn,
 145                               struct mm_struct *mm)
 146 {
 147         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 148
 149         INIT_WORK(&amn->work, amdgpu_mn_destroy);
 150         schedule_work(&amn->work);
 151 }
 152
 153
 154 /**
 155  * amdgpu_mn_lock - take the write side lock for this notifier
 156  *
 157  * @mn: our notifier
 158  */
 159 void amdgpu_mn_lock(struct amdgpu_mn *mn)
 160 {
 161         if (mn)
 162                 down_write(&mn->lock);
 163 }
 164
 165 /**
 166  * amdgpu_mn_unlock - drop the write side lock for this notifier
 167  *
 168  * @mn: our notifier
 169  */
 170 void amdgpu_mn_unlock(struct amdgpu_mn *mn)
 171 {
 172         if (mn)
 173                 up_write(&mn->lock);
 174 }
 175
 176 /**
 177  * amdgpu_mn_read_lock - take the read side lock for this notifier
 178  *
 179  * @amn: our notifier
 180  */
 181 static void amdgpu_mn_read_lock(struct amdgpu_mn *amn)
 182 {
 183         mutex_lock(&amn->read_lock);
 184         if (atomic_inc_return(&amn->recursion) == 1)
 185                 down_read_non_owner(&amn->lock);
 186         mutex_unlock(&amn->read_lock);
 187 }
 188
 189 /**
 190  * amdgpu_mn_read_unlock - drop the read side lock for this notifier
 191  *
 192  * @amn: our notifier
 193  */
 194 static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
 195 {
 196         if (atomic_dec_return(&amn->recursion) == 0)
 197                 up_read_non_owner(&amn->lock);
 198 }
 199
 200 /**
 201  * amdgpu_mn_invalidate_node - unmap all BOs of a node
 202  *
 203  * @node: the node with the BOs to unmap
 204  * @start: start of address range affected
 205  * @end: end of address range affected
 206  *
 207  * Block for operations on BOs to finish and mark pages as accessed and
 208  * potentially dirty.
 209  */
 210 static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 211                                       unsigned long start,
 212                                       unsigned long end)
 213 {
 214         struct amdgpu_bo *bo;
 215         long r;
 216
 217         list_for_each_entry(bo, &node->bos, mn_list) {
 218
 219                 if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
 220                         continue;
 221
 222                 r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
 223                         true, false, MAX_SCHEDULE_TIMEOUT);
 224                 if (r <= 0)
 225                         DRM_ERROR("(%ld) failed to wait for user bo\n", r);
 226
 227                 amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
 228         }
 229 }
 230
 231 /**
 232  * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
 233  *
 234  * @mn: our notifier
 235  * @mm: the mm this callback is about
 236  * @start: start of updated range
 237  * @end: end of updated range
 238  *
 239  * Block for operations on BOs to finish and mark pages as accessed and
 240  * potentially dirty.
 241  */
 242 static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 243                                                  struct mm_struct *mm,
 244                                                  unsigned long start,
 245                                                  unsigned long end)
 246 {
 247         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 248         struct interval_tree_node *it;
 249
 250         /* notification is exclusive, but interval is inclusive */
 251         end -= 1;
 252
 253         amdgpu_mn_read_lock(amn);
 254
 255         it = interval_tree_iter_first(&amn->objects, start, end);
 256         while (it) {
 257                 struct amdgpu_mn_node *node;
 258
 259                 node = container_of(it, struct amdgpu_mn_node, it);
 260                 it = interval_tree_iter_next(it, start, end);
 261
 262                 amdgpu_mn_invalidate_node(node, start, end);
 263         }
 264 }
 265
 266 /**
 267  * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
 268  *
 269  * @mn: our notifier
 270  * @mm: the mm this callback is about
 271  * @start: start of updated range
 272  * @end: end of updated range
 273  *
 274  * We temporarily evict all BOs between start and end. This
 275  * necessitates evicting all user-mode queues of the process. The BOs
 276  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
 277  */
 278 static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 279                                                  struct mm_struct *mm,
 280                                                  unsigned long start,
 281                                                  unsigned long end)
 282 {
 283         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 284         struct interval_tree_node *it;
 285
 286         /* notification is exclusive, but interval is inclusive */
 287         end -= 1;
 288
 289         amdgpu_mn_read_lock(amn);
 290
 291         it = interval_tree_iter_first(&amn->objects, start, end);
 292         while (it) {
 293                 struct amdgpu_mn_node *node;
 294                 struct amdgpu_bo *bo;
 295
 296                 node = container_of(it, struct amdgpu_mn_node, it);
 297                 it = interval_tree_iter_next(it, start, end);
 298
 299                 list_for_each_entry(bo, &node->bos, mn_list) {
 300                         struct kgd_mem *mem = bo->kfd_bo;
 301
 302                         if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
 303                                                          start, end))
 304                                 amdgpu_amdkfd_evict_userptr(mem, mm);
 305                 }
 306         }
 307 }
 308
 309 /**
 310  * amdgpu_mn_invalidate_range_end - callback to notify about mm change
 311  *
 312  * @mn: our notifier
 313  * @mm: the mm this callback is about
 314  * @start: start of updated range
 315  * @end: end of updated range
 316  *
 317  * Release the lock again to allow new command submissions.
 318  */
 319 static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
 320                                            struct mm_struct *mm,
 321                                            unsigned long start,
 322                                            unsigned long end)
 323 {
 324         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 325
 326         amdgpu_mn_read_unlock(amn);
 327 }
 328
 329 static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
 330         [AMDGPU_MN_TYPE_GFX] = {
 331                 .release = amdgpu_mn_release,
 332                 .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
 333                 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
 334         },
 335         [AMDGPU_MN_TYPE_HSA] = {
 336                 .release = amdgpu_mn_release,
 337                 .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
 338                 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
 339         },
 340 };
 341
 342 /* Low bits of any reasonable mm pointer will be unused due to struct
 343  * alignment. Use these bits to make a unique key from the mm pointer
 344  * and notifier type.
 345  */
 346 #define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
 347
 348 /**
 349  * amdgpu_mn_get - create notifier context
 350  *
 351  * @adev: amdgpu device pointer
 352  * @type: type of MMU notifier context
 353  *
 354  * Creates a notifier context for current->mm.
 355  */
 356 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 357                                 enum amdgpu_mn_type type)
 358 {
 359         struct mm_struct *mm = current->mm;
 360         struct amdgpu_mn *amn;
 361         unsigned long key = AMDGPU_MN_KEY(mm, type);
 362         int r;
 363
 364         mutex_lock(&adev->mn_lock);
 365         if (down_write_killable(&mm->mmap_sem)) {
 366                 mutex_unlock(&adev->mn_lock);
 367                 return ERR_PTR(-EINTR);
 368         }
 369
 370         hash_for_each_possible(adev->mn_hash, amn, node, key)
 371                 if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
 372                         goto release_locks;
 373
 374         amn = kzalloc(sizeof(*amn), GFP_KERNEL);
 375         if (!amn) {
 376                 amn = ERR_PTR(-ENOMEM);
 377                 goto release_locks;
 378         }
 379
 380         amn->adev = adev;
 381         amn->mm = mm;
 382         init_rwsem(&amn->lock);
 383         amn->type = type;
 384         amn->mn.ops = &amdgpu_mn_ops[type];
 385         amn->objects = RB_ROOT_CACHED;
 386         mutex_init(&amn->read_lock);
 387         atomic_set(&amn->recursion, 0);
 388
 389         r = __mmu_notifier_register(&amn->mn, mm);
 390         if (r)
 391                 goto free_amn;
 392
 393         hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
 394
 395 release_locks:
 396         up_write(&mm->mmap_sem);
 397         mutex_unlock(&adev->mn_lock);
 398
 399         return amn;
 400
 401 free_amn:
 402         up_write(&mm->mmap_sem);
 403         mutex_unlock(&adev->mn_lock);
 404         kfree(amn);
 405
 406         return ERR_PTR(r);
 407 }
 408
 409 /**
 410  * amdgpu_mn_register - register a BO for notifier updates
 411  *
 412  * @bo: amdgpu buffer object
 413  * @addr: userptr addr we should monitor
 414  *
 415  * Registers an MMU notifier for the given BO at the specified address.
 416  * Returns 0 on success, -ERRNO if anything goes wrong.
 417  */
 418 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 419 {
 420         unsigned long end = addr + amdgpu_bo_size(bo) - 1;
 421         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 422         enum amdgpu_mn_type type =
 423                 bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
 424         struct amdgpu_mn *amn;
 425         struct amdgpu_mn_node *node = NULL, *new_node;
 426         struct list_head bos;
 427         struct interval_tree_node *it;
 428
 429         amn = amdgpu_mn_get(adev, type);
 430         if (IS_ERR(amn))
 431                 return PTR_ERR(amn);
 432
 433         new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
 434         if (!new_node)
 435                 return -ENOMEM;
 436
 437         INIT_LIST_HEAD(&bos);
 438
 439         down_write(&amn->lock);
 440
 441         while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
 442                 kfree(node);
 443                 node = container_of(it, struct amdgpu_mn_node, it);
 444                 interval_tree_remove(&node->it, &amn->objects);
 445                 addr = min(it->start, addr);
 446                 end = max(it->last, end);
 447                 list_splice(&node->bos, &bos);
 448         }
 449
 450         if (!node)
 451                 node = new_node;
 452         else
 453                 kfree(new_node);
 454
 455         bo->mn = amn;
 456
 457         node->it.start = addr;
 458         node->it.last = end;
 459         INIT_LIST_HEAD(&node->bos);
 460         list_splice(&bos, &node->bos);
 461         list_add(&bo->mn_list, &node->bos);
 462
 463         interval_tree_insert(&node->it, &amn->objects);
 464
 465         up_write(&amn->lock);
 466
 467         return 0;
 468 }
 469
 470 /**
 471  * amdgpu_mn_unregister - unregister a BO for notifier updates
 472  *
 473  * @bo: amdgpu buffer object
 474  *
 475  * Remove any registration of MMU notifier updates from the buffer object.
 476  */
 477 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 478 {
 479         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 480         struct amdgpu_mn *amn;
 481         struct list_head *head;
 482
 483         mutex_lock(&adev->mn_lock);
 484
 485         amn = bo->mn;
 486         if (amn == NULL) {
 487                 mutex_unlock(&adev->mn_lock);
 488                 return;
 489         }
 490
 491         down_write(&amn->lock);
 492
 493         /* save the next list entry for later */
 494         head = bo->mn_list.next;
 495
 496         bo->mn = NULL;
 497         list_del_init(&bo->mn_list);
 498
 499         if (list_empty(head)) {
 500                 struct amdgpu_mn_node *node;
 501
 502                 node = container_of(head, struct amdgpu_mn_node, bos);
 503                 interval_tree_remove(&node->it, &amn->objects);
 504                 kfree(node);
 505         }
 506
 507         up_write(&amn->lock);
 508         mutex_unlock(&adev->mn_lock);
 509 }
 510