]> Git Repo - linux.git/commitdiff
hugetlb: do early cow when page pinned on src mm
authorPeter Xu <[email protected]>
Sat, 13 Mar 2021 05:07:33 +0000 (21:07 -0800)
committerLinus Torvalds <[email protected]>
Sat, 13 Mar 2021 19:27:30 +0000 (11:27 -0800)
This is the last missing piece of the COW-during-fork effort when there're
pinned pages found.  One can reference 70e806e4e645 ("mm: Do early cow for
pinned pages during fork() for ptes", 2020-09-27) for more information,
since we do similar things here rather than pte this time, but just for
hugetlb.

Note that after Jason's recent work on 57efa1fe5957 ("mm/gup: prevent
gup_fast from racing with COW during fork", 2020-12-15) which is safer and
easier to understand, we're safe now within the whole copy_page_range()
against gup-fast, we don't need the wr-protect trick that proposed in
70e806e4e645 anymore.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Peter Xu <[email protected]>
Reviewed-by: Mike Kravetz <[email protected]>
Reviewed-by: Jason Gunthorpe <[email protected]>
Cc: Alexey Dobriyan <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: David Airlie <[email protected]>
Cc: David Gibson <[email protected]>
Cc: Gal Pressman <[email protected]>
Cc: Jan Kara <[email protected]>
Cc: Jann Horn <[email protected]>
Cc: Kirill Shutemov <[email protected]>
Cc: Kirill Tkhai <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Miaohe Lin <[email protected]>
Cc: Mike Rapoport <[email protected]>
Cc: Roland Scheidegger <[email protected]>
Cc: VMware Graphics <[email protected]>
Cc: Wei Zhang <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
mm/hugetlb.c

index 7786267da2fe3d1d9723ac1078f96f92884cfa29..5b1ab1f427c57cd1a66c29d9fca147d02bf229d4 100644 (file)
@@ -3728,6 +3728,18 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
                return false;
 }
 
+static void
+hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
+                    struct page *new_page)
+{
+       __SetPageUptodate(new_page);
+       set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
+       hugepage_add_new_anon_rmap(new_page, vma, addr);
+       hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
+       ClearHPageRestoreReserve(new_page);
+       SetHPageMigratable(new_page);
+}
+
 int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                            struct vm_area_struct *vma)
 {
@@ -3737,6 +3749,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
        bool cow = is_cow_mapping(vma->vm_flags);
        struct hstate *h = hstate_vma(vma);
        unsigned long sz = huge_page_size(h);
+       unsigned long npages = pages_per_huge_page(h);
        struct address_space *mapping = vma->vm_file->f_mapping;
        struct mmu_notifier_range range;
        int ret = 0;
@@ -3785,6 +3798,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
                entry = huge_ptep_get(src_pte);
                dst_entry = huge_ptep_get(dst_pte);
+again:
                if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
                        /*
                         * Skip if src entry none.  Also, skip in the
@@ -3808,6 +3822,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                        }
                        set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
                } else {
+                       entry = huge_ptep_get(src_pte);
+                       ptepage = pte_page(entry);
+                       get_page(ptepage);
+
+                       /*
+                        * This is a rare case where we see pinned hugetlb
+                        * pages while they're prone to COW.  We need to do the
+                        * COW earlier during fork.
+                        *
+                        * When pre-allocating the page or copying data, we
+                        * need to be without the pgtable locks since we could
+                        * sleep during the process.
+                        */
+                       if (unlikely(page_needs_cow_for_dma(vma, ptepage))) {
+                               pte_t src_pte_old = entry;
+                               struct page *new;
+
+                               spin_unlock(src_ptl);
+                               spin_unlock(dst_ptl);
+                               /* Do not use reserve as it's private owned */
+                               new = alloc_huge_page(vma, addr, 1);
+                               if (IS_ERR(new)) {
+                                       put_page(ptepage);
+                                       ret = PTR_ERR(new);
+                                       break;
+                               }
+                               copy_user_huge_page(new, ptepage, addr, vma,
+                                                   npages);
+                               put_page(ptepage);
+
+                               /* Install the new huge page if src pte stable */
+                               dst_ptl = huge_pte_lock(h, dst, dst_pte);
+                               src_ptl = huge_pte_lockptr(h, src, src_pte);
+                               spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+                               entry = huge_ptep_get(src_pte);
+                               if (!pte_same(src_pte_old, entry)) {
+                                       put_page(new);
+                                       /* dst_entry won't change as in child */
+                                       goto again;
+                               }
+                               hugetlb_install_page(vma, dst_pte, addr, new);
+                               spin_unlock(src_ptl);
+                               spin_unlock(dst_ptl);
+                               continue;
+                       }
+
                        if (cow) {
                                /*
                                 * No need to notify as we are downgrading page
@@ -3818,12 +3878,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                                 */
                                huge_ptep_set_wrprotect(src, addr, src_pte);
                        }
-                       entry = huge_ptep_get(src_pte);
-                       ptepage = pte_page(entry);
-                       get_page(ptepage);
+
                        page_dup_rmap(ptepage, true);
                        set_huge_pte_at(dst, addr, dst_pte, entry);
-                       hugetlb_count_add(pages_per_huge_page(h), dst);
+                       hugetlb_count_add(npages, dst);
                }
                spin_unlock(src_ptl);
                spin_unlock(dst_ptl);
This page took 0.069662 seconds and 4 git commands to generate.