include/linux/hugetlb.h

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 #ifndef _LINUX_HUGETLB_H
   3 #define _LINUX_HUGETLB_H
   4
   5 #include <linux/mm.h>
   6 #include <linux/mm_types.h>
   7 #include <linux/mmdebug.h>
   8 #include <linux/fs.h>
   9 #include <linux/hugetlb_inline.h>
  10 #include <linux/cgroup.h>
  11 #include <linux/page_ref.h>
  12 #include <linux/list.h>
  13 #include <linux/kref.h>
  14 #include <linux/pgtable.h>
  15 #include <linux/gfp.h>
  16 #include <linux/userfaultfd_k.h>
  17
  18 struct ctl_table;
  19 struct user_struct;
  20 struct mmu_gather;
  21 struct node;
  22
  23 void free_huge_folio(struct folio *folio);
  24
  25 #ifdef CONFIG_HUGETLB_PAGE
  26
  27 #include <linux/pagemap.h>
  28 #include <linux/shm.h>
  29 #include <asm/tlbflush.h>
  30
  31 /*
  32  * For HugeTLB page, there are more metadata to save in the struct page. But
  33  * the head struct page cannot meet our needs, so we have to abuse other tail
  34  * struct page to store the metadata.
  35  */
  36 #define __NR_USED_SUBPAGE 3
  37
  38 struct hugepage_subpool {
  39         spinlock_t lock;
  40         long count;
  41         long max_hpages;        /* Maximum huge pages or -1 if no maximum. */
  42         long used_hpages;       /* Used count against maximum, includes */
  43                                 /* both allocated and reserved pages. */
  44         struct hstate *hstate;
  45         long min_hpages;        /* Minimum huge pages or -1 if no minimum. */
  46         long rsv_hpages;        /* Pages reserved against global pool to */
  47                                 /* satisfy minimum size. */
  48 };
  49
  50 struct resv_map {
  51         struct kref refs;
  52         spinlock_t lock;
  53         struct list_head regions;
  54         long adds_in_progress;
  55         struct list_head region_cache;
  56         long region_cache_count;
  57         struct rw_semaphore rw_sema;
  58 #ifdef CONFIG_CGROUP_HUGETLB
  59         /*
  60          * On private mappings, the counter to uncharge reservations is stored
  61          * here. If these fields are 0, then either the mapping is shared, or
  62          * cgroup accounting is disabled for this resv_map.
  63          */
  64         struct page_counter *reservation_counter;
  65         unsigned long pages_per_hpage;
  66         struct cgroup_subsys_state *css;
  67 #endif
  68 };
  69
  70 /*
  71  * Region tracking -- allows tracking of reservations and instantiated pages
  72  *                    across the pages in a mapping.
  73  *
  74  * The region data structures are embedded into a resv_map and protected
  75  * by a resv_map's lock.  The set of regions within the resv_map represent
  76  * reservations for huge pages, or huge pages that have already been
  77  * instantiated within the map.  The from and to elements are huge page
  78  * indices into the associated mapping.  from indicates the starting index
  79  * of the region.  to represents the first index past the end of  the region.
  80  *
  81  * For example, a file region structure with from == 0 and to == 4 represents
  82  * four huge pages in a mapping.  It is important to note that the to element
  83  * represents the first element past the end of the region. This is used in
  84  * arithmetic as 4(to) - 0(from) = 4 huge pages in the region.
  85  *
  86  * Interval notation of the form [from, to) will be used to indicate that
  87  * the endpoint from is inclusive and to is exclusive.
  88  */
  89 struct file_region {
  90         struct list_head link;
  91         long from;
  92         long to;
  93 #ifdef CONFIG_CGROUP_HUGETLB
  94         /*
  95          * On shared mappings, each reserved region appears as a struct
  96          * file_region in resv_map. These fields hold the info needed to
  97          * uncharge each reservation.
  98          */
  99         struct page_counter *reservation_counter;
 100         struct cgroup_subsys_state *css;
 101 #endif
 102 };
 103
 104 struct hugetlb_vma_lock {
 105         struct kref refs;
 106         struct rw_semaphore rw_sema;
 107         struct vm_area_struct *vma;
 108 };
 109
 110 extern struct resv_map *resv_map_alloc(void);
 111 void resv_map_release(struct kref *ref);
 112
 113 extern spinlock_t hugetlb_lock;
 114 extern int hugetlb_max_hstate __read_mostly;
 115 #define for_each_hstate(h) \
 116         for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++)
 117
 118 struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
 119                                                 long min_hpages);
 120 void hugepage_put_subpool(struct hugepage_subpool *spool);
 121
 122 void hugetlb_dup_vma_private(struct vm_area_struct *vma);
 123 void clear_vma_resv_huge_pages(struct vm_area_struct *vma);
 124 int move_hugetlb_page_tables(struct vm_area_struct *vma,
 125                              struct vm_area_struct *new_vma,
 126                              unsigned long old_addr, unsigned long new_addr,
 127                              unsigned long len);
 128 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
 129                             struct vm_area_struct *, struct vm_area_struct *);
 130 struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
 131                                       unsigned long address, unsigned int flags,
 132                                       unsigned int *page_mask);
 133 void unmap_hugepage_range(struct vm_area_struct *,
 134                           unsigned long, unsigned long, struct page *,
 135                           zap_flags_t);
 136 void __unmap_hugepage_range(struct mmu_gather *tlb,
 137                           struct vm_area_struct *vma,
 138                           unsigned long start, unsigned long end,
 139                           struct page *ref_page, zap_flags_t zap_flags);
 140 void hugetlb_report_meminfo(struct seq_file *);
 141 int hugetlb_report_node_meminfo(char *buf, int len, int nid);
 142 void hugetlb_show_meminfo_node(int nid);
 143 unsigned long hugetlb_total_pages(void);
 144 vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 145                         unsigned long address, unsigned int flags);
 146 #ifdef CONFIG_USERFAULTFD
 147 int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 148                              struct vm_area_struct *dst_vma,
 149                              unsigned long dst_addr,
 150                              unsigned long src_addr,
 151                              uffd_flags_t flags,
 152                              struct folio **foliop);
 153 #endif /* CONFIG_USERFAULTFD */
 154 bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
 155                                                 struct vm_area_struct *vma,
 156                                                 vm_flags_t vm_flags);
 157 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 158                                                 long freed);
 159 bool isolate_hugetlb(struct folio *folio, struct list_head *list);
 160 int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison);
 161 int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 162                                 bool *migratable_cleared);
 163 void folio_putback_active_hugetlb(struct folio *folio);
 164 void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
 165 void hugetlb_fix_reserve_counts(struct inode *inode);
 166 extern struct mutex *hugetlb_fault_mutex_table;
 167 u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx);
 168
 169 pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
 170                       unsigned long addr, pud_t *pud);
 171 bool hugetlbfs_pagecache_present(struct hstate *h,
 172                                  struct vm_area_struct *vma,
 173                                  unsigned long address);
 174
 175 struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio);
 176
 177 extern int sysctl_hugetlb_shm_group;
 178 extern struct list_head huge_boot_pages[MAX_NUMNODES];
 179
 180 /* arch callbacks */
 181
 182 #ifndef CONFIG_HIGHPTE
 183 /*
 184  * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures
 185  * which may go down to the lowest PTE level in their huge_pte_offset() and
 186  * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap().
 187  */
 188 static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address)
 189 {
 190         return pte_offset_kernel(pmd, address);
 191 }
 192 static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd,
 193                                     unsigned long address)
 194 {
 195         return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address);
 196 }
 197 #endif
 198
 199 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
 200                         unsigned long addr, unsigned long sz);
 201 /*
 202  * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE.
 203  * Returns the pte_t* if found, or NULL if the address is not mapped.
 204  *
 205  * IMPORTANT: we should normally not directly call this function, instead
 206  * this is only a common interface to implement arch-specific
 207  * walker. Please use hugetlb_walk() instead, because that will attempt to
 208  * verify the locking for you.
 209  *
 210  * Since this function will walk all the pgtable pages (including not only
 211  * high-level pgtable page, but also PUD entry that can be unshared
 212  * concurrently for VM_SHARED), the caller of this function should be
 213  * responsible of its thread safety.  One can follow this rule:
 214  *
 215  *  (1) For private mappings: pmd unsharing is not possible, so holding the
 216  *      mmap_lock for either read or write is sufficient. Most callers
 217  *      already hold the mmap_lock, so normally, no special action is
 218  *      required.
 219  *
 220  *  (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged
 221  *      pgtable page can go away from under us!  It can be done by a pmd
 222  *      unshare with a follow up munmap() on the other process), then we
 223  *      need either:
 224  *
 225  *     (2.1) hugetlb vma lock read or write held, to make sure pmd unshare
 226  *           won't happen upon the range (it also makes sure the pte_t we
 227  *           read is the right and stable one), or,
 228  *
 229  *     (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make
 230  *           sure even if unshare happened the racy unmap() will wait until
 231  *           i_mmap_rwsem is released.
 232  *
 233  * Option (2.1) is the safest, which guarantees pte stability from pmd
 234  * sharing pov, until the vma lock released.  Option (2.2) doesn't protect
 235  * a concurrent pmd unshare, but it makes sure the pgtable page is safe to
 236  * access.
 237  */
 238 pte_t *huge_pte_offset(struct mm_struct *mm,
 239                        unsigned long addr, unsigned long sz);
 240 unsigned long hugetlb_mask_last_page(struct hstate *h);
 241 int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
 242                                 unsigned long addr, pte_t *ptep);
 243 void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
 244                                 unsigned long *start, unsigned long *end);
 245
 246 extern void __hugetlb_zap_begin(struct vm_area_struct *vma,
 247                                 unsigned long *begin, unsigned long *end);
 248 extern void __hugetlb_zap_end(struct vm_area_struct *vma,
 249                               struct zap_details *details);
 250
 251 static inline void hugetlb_zap_begin(struct vm_area_struct *vma,
 252                                      unsigned long *start, unsigned long *end)
 253 {
 254         if (is_vm_hugetlb_page(vma))
 255                 __hugetlb_zap_begin(vma, start, end);
 256 }
 257
 258 static inline void hugetlb_zap_end(struct vm_area_struct *vma,
 259                                    struct zap_details *details)
 260 {
 261         if (is_vm_hugetlb_page(vma))
 262                 __hugetlb_zap_end(vma, details);
 263 }
 264
 265 void hugetlb_vma_lock_read(struct vm_area_struct *vma);
 266 void hugetlb_vma_unlock_read(struct vm_area_struct *vma);
 267 void hugetlb_vma_lock_write(struct vm_area_struct *vma);
 268 void hugetlb_vma_unlock_write(struct vm_area_struct *vma);
 269 int hugetlb_vma_trylock_write(struct vm_area_struct *vma);
 270 void hugetlb_vma_assert_locked(struct vm_area_struct *vma);
 271 void hugetlb_vma_lock_release(struct kref *kref);
 272 long hugetlb_change_protection(struct vm_area_struct *vma,
 273                 unsigned long address, unsigned long end, pgprot_t newprot,
 274                 unsigned long cp_flags);
 275 bool is_hugetlb_entry_migration(pte_t pte);
 276 bool is_hugetlb_entry_hwpoisoned(pte_t pte);
 277 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
 278
 279 #else /* !CONFIG_HUGETLB_PAGE */
 280
 281 static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma)
 282 {
 283 }
 284
 285 static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma)
 286 {
 287 }
 288
 289 static inline unsigned long hugetlb_total_pages(void)
 290 {
 291         return 0;
 292 }
 293
 294 static inline struct address_space *hugetlb_folio_mapping_lock_write(
 295                                                         struct folio *folio)
 296 {
 297         return NULL;
 298 }
 299
 300 static inline int huge_pmd_unshare(struct mm_struct *mm,
 301                                         struct vm_area_struct *vma,
 302                                         unsigned long addr, pte_t *ptep)
 303 {
 304         return 0;
 305 }
 306
 307 static inline void adjust_range_if_pmd_sharing_possible(
 308                                 struct vm_area_struct *vma,
 309                                 unsigned long *start, unsigned long *end)
 310 {
 311 }
 312
 313 static inline void hugetlb_zap_begin(
 314                                 struct vm_area_struct *vma,
 315                                 unsigned long *start, unsigned long *end)
 316 {
 317 }
 318
 319 static inline void hugetlb_zap_end(
 320                                 struct vm_area_struct *vma,
 321                                 struct zap_details *details)
 322 {
 323 }
 324
 325 static inline int copy_hugetlb_page_range(struct mm_struct *dst,
 326                                           struct mm_struct *src,
 327                                           struct vm_area_struct *dst_vma,
 328                                           struct vm_area_struct *src_vma)
 329 {
 330         BUG();
 331         return 0;
 332 }
 333
 334 static inline int move_hugetlb_page_tables(struct vm_area_struct *vma,
 335                                            struct vm_area_struct *new_vma,
 336                                            unsigned long old_addr,
 337                                            unsigned long new_addr,
 338                                            unsigned long len)
 339 {
 340         BUG();
 341         return 0;
 342 }
 343
 344 static inline void hugetlb_report_meminfo(struct seq_file *m)
 345 {
 346 }
 347
 348 static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid)
 349 {
 350         return 0;
 351 }
 352
 353 static inline void hugetlb_show_meminfo_node(int nid)
 354 {
 355 }
 356
 357 static inline int prepare_hugepage_range(struct file *file,
 358                                 unsigned long addr, unsigned long len)
 359 {
 360         return -EINVAL;
 361 }
 362
 363 static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma)
 364 {
 365 }
 366
 367 static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
 368 {
 369 }
 370
 371 static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma)
 372 {
 373 }
 374
 375 static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
 376 {
 377 }
 378
 379 static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
 380 {
 381         return 1;
 382 }
 383
 384 static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
 385 {
 386 }
 387
 388 static inline int is_hugepage_only_range(struct mm_struct *mm,
 389                                         unsigned long addr, unsigned long len)
 390 {
 391         return 0;
 392 }
 393
 394 static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 395                                 unsigned long addr, unsigned long end,
 396                                 unsigned long floor, unsigned long ceiling)
 397 {
 398         BUG();
 399 }
 400
 401 #ifdef CONFIG_USERFAULTFD
 402 static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 403                                            struct vm_area_struct *dst_vma,
 404                                            unsigned long dst_addr,
 405                                            unsigned long src_addr,
 406                                            uffd_flags_t flags,
 407                                            struct folio **foliop)
 408 {
 409         BUG();
 410         return 0;
 411 }
 412 #endif /* CONFIG_USERFAULTFD */
 413
 414 static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
 415                                         unsigned long sz)
 416 {
 417         return NULL;
 418 }
 419
 420 static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list)
 421 {
 422         return false;
 423 }
 424
 425 static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison)
 426 {
 427         return 0;
 428 }
 429
 430 static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 431                                         bool *migratable_cleared)
 432 {
 433         return 0;
 434 }
 435
 436 static inline void folio_putback_active_hugetlb(struct folio *folio)
 437 {
 438 }
 439
 440 static inline void move_hugetlb_state(struct folio *old_folio,
 441                                         struct folio *new_folio, int reason)
 442 {
 443 }
 444
 445 static inline long hugetlb_change_protection(
 446                         struct vm_area_struct *vma, unsigned long address,
 447                         unsigned long end, pgprot_t newprot,
 448                         unsigned long cp_flags)
 449 {
 450         return 0;
 451 }
 452
 453 static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
 454                         struct vm_area_struct *vma, unsigned long start,
 455                         unsigned long end, struct page *ref_page,
 456                         zap_flags_t zap_flags)
 457 {
 458         BUG();
 459 }
 460
 461 static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
 462                         struct vm_area_struct *vma, unsigned long address,
 463                         unsigned int flags)
 464 {
 465         BUG();
 466         return 0;
 467 }
 468
 469 static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
 470
 471 #endif /* !CONFIG_HUGETLB_PAGE */
 472
 473 #ifndef pgd_write
 474 static inline int pgd_write(pgd_t pgd)
 475 {
 476         BUG();
 477         return 0;
 478 }
 479 #endif
 480
 481 #define HUGETLB_ANON_FILE "anon_hugepage"
 482
 483 enum {
 484         /*
 485          * The file will be used as an shm file so shmfs accounting rules
 486          * apply
 487          */
 488         HUGETLB_SHMFS_INODE     = 1,
 489         /*
 490          * The file is being created on the internal vfs mount and shmfs
 491          * accounting rules do not apply
 492          */
 493         HUGETLB_ANONHUGE_INODE  = 2,
 494 };
 495
 496 #ifdef CONFIG_HUGETLBFS
 497 struct hugetlbfs_sb_info {
 498         long    max_inodes;   /* inodes allowed */
 499         long    free_inodes;  /* inodes free */
 500         spinlock_t      stat_lock;
 501         struct hstate *hstate;
 502         struct hugepage_subpool *spool;
 503         kuid_t  uid;
 504         kgid_t  gid;
 505         umode_t mode;
 506 };
 507
 508 static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 509 {
 510         return sb->s_fs_info;
 511 }
 512
 513 struct hugetlbfs_inode_info {
 514         struct inode vfs_inode;
 515         unsigned int seals;
 516 };
 517
 518 static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
 519 {
 520         return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
 521 }
 522
 523 extern const struct vm_operations_struct hugetlb_vm_ops;
 524 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
 525                                 int creat_flags, int page_size_log);
 526
 527 static inline bool is_file_hugepages(const struct file *file)
 528 {
 529         return file->f_op->fop_flags & FOP_HUGE_PAGES;
 530 }
 531
 532 static inline struct hstate *hstate_inode(struct inode *i)
 533 {
 534         return HUGETLBFS_SB(i->i_sb)->hstate;
 535 }
 536 #else /* !CONFIG_HUGETLBFS */
 537
 538 #define is_file_hugepages(file)                 false
 539 static inline struct file *
 540 hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
 541                 int creat_flags, int page_size_log)
 542 {
 543         return ERR_PTR(-ENOSYS);
 544 }
 545
 546 static inline struct hstate *hstate_inode(struct inode *i)
 547 {
 548         return NULL;
 549 }
 550 #endif /* !CONFIG_HUGETLBFS */
 551
 552 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 553 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 554                                         unsigned long len, unsigned long pgoff,
 555                                         unsigned long flags);
 556 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
 557
 558 unsigned long
 559 generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 560                                   unsigned long len, unsigned long pgoff,
 561                                   unsigned long flags);
 562
 563 /*
 564  * huegtlb page specific state flags.  These flags are located in page.private
 565  * of the hugetlb head page.  Functions created via the below macros should be
 566  * used to manipulate these flags.
 567  *
 568  * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at
 569  *      allocation time.  Cleared when page is fully instantiated.  Free
 570  *      routine checks flag to restore a reservation on error paths.
 571  *      Synchronization:  Examined or modified by code that knows it has
 572  *      the only reference to page.  i.e. After allocation but before use
 573  *      or when the page is being freed.
 574  * HPG_migratable  - Set after a newly allocated page is added to the page
 575  *      cache and/or page tables.  Indicates the page is a candidate for
 576  *      migration.
 577  *      Synchronization:  Initially set after new page allocation with no
 578  *      locking.  When examined and modified during migration processing
 579  *      (isolate, migrate, putback) the hugetlb_lock is held.
 580  * HPG_temporary - Set on a page that is temporarily allocated from the buddy
 581  *      allocator.  Typically used for migration target pages when no pages
 582  *      are available in the pool.  The hugetlb free page path will
 583  *      immediately free pages with this flag set to the buddy allocator.
 584  *      Synchronization: Can be set after huge page allocation from buddy when
 585  *      code knows it has only reference.  All other examinations and
 586  *      modifications require hugetlb_lock.
 587  * HPG_freed - Set when page is on the free lists.
 588  *      Synchronization: hugetlb_lock held for examination and modification.
 589  * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed.
 590  * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page
 591  *     that is not tracked by raw_hwp_page list.
 592  */
 593 enum hugetlb_page_flags {
 594         HPG_restore_reserve = 0,
 595         HPG_migratable,
 596         HPG_temporary,
 597         HPG_freed,
 598         HPG_vmemmap_optimized,
 599         HPG_raw_hwp_unreliable,
 600         __NR_HPAGEFLAGS,
 601 };
 602
 603 /*
 604  * Macros to create test, set and clear function definitions for
 605  * hugetlb specific page flags.
 606  */
 607 #ifdef CONFIG_HUGETLB_PAGE
 608 #define TESTHPAGEFLAG(uname, flname)                            \
 609 static __always_inline                                          \
 610 bool folio_test_hugetlb_##flname(struct folio *folio)           \
 611         {       void *private = &folio->private;                \
 612                 return test_bit(HPG_##flname, private);         \
 613         }
 614
 615 #define SETHPAGEFLAG(uname, flname)                             \
 616 static __always_inline                                          \
 617 void folio_set_hugetlb_##flname(struct folio *folio)            \
 618         {       void *private = &folio->private;                \
 619                 set_bit(HPG_##flname, private);                 \
 620         }
 621
 622 #define CLEARHPAGEFLAG(uname, flname)                           \
 623 static __always_inline                                          \
 624 void folio_clear_hugetlb_##flname(struct folio *folio)          \
 625         {       void *private = &folio->private;                \
 626                 clear_bit(HPG_##flname, private);               \
 627         }
 628 #else
 629 #define TESTHPAGEFLAG(uname, flname)                            \
 630 static inline bool                                              \
 631 folio_test_hugetlb_##flname(struct folio *folio)                \
 632         { return 0; }
 633
 634 #define SETHPAGEFLAG(uname, flname)                             \
 635 static inline void                                              \
 636 folio_set_hugetlb_##flname(struct folio *folio)                 \
 637         { }
 638
 639 #define CLEARHPAGEFLAG(uname, flname)                           \
 640 static inline void                                              \
 641 folio_clear_hugetlb_##flname(struct folio *folio)               \
 642         { }
 643 #endif
 644
 645 #define HPAGEFLAG(uname, flname)                                \
 646         TESTHPAGEFLAG(uname, flname)                            \
 647         SETHPAGEFLAG(uname, flname)                             \
 648         CLEARHPAGEFLAG(uname, flname)                           \
 649
 650 /*
 651  * Create functions associated with hugetlb page flags
 652  */
 653 HPAGEFLAG(RestoreReserve, restore_reserve)
 654 HPAGEFLAG(Migratable, migratable)
 655 HPAGEFLAG(Temporary, temporary)
 656 HPAGEFLAG(Freed, freed)
 657 HPAGEFLAG(VmemmapOptimized, vmemmap_optimized)
 658 HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable)
 659
 660 #ifdef CONFIG_HUGETLB_PAGE
 661
 662 #define HSTATE_NAME_LEN 32
 663 /* Defines one hugetlb page size */
 664 struct hstate {
 665         struct mutex resize_lock;
 666         struct lock_class_key resize_key;
 667         int next_nid_to_alloc;
 668         int next_nid_to_free;
 669         unsigned int order;
 670         unsigned int demote_order;
 671         unsigned long mask;
 672         unsigned long max_huge_pages;
 673         unsigned long nr_huge_pages;
 674         unsigned long free_huge_pages;
 675         unsigned long resv_huge_pages;
 676         unsigned long surplus_huge_pages;
 677         unsigned long nr_overcommit_huge_pages;
 678         struct list_head hugepage_activelist;
 679         struct list_head hugepage_freelists[MAX_NUMNODES];
 680         unsigned int max_huge_pages_node[MAX_NUMNODES];
 681         unsigned int nr_huge_pages_node[MAX_NUMNODES];
 682         unsigned int free_huge_pages_node[MAX_NUMNODES];
 683         unsigned int surplus_huge_pages_node[MAX_NUMNODES];
 684         char name[HSTATE_NAME_LEN];
 685 };
 686
 687 struct huge_bootmem_page {
 688         struct list_head list;
 689         struct hstate *hstate;
 690 };
 691
 692 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
 693 struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
 694                                 unsigned long addr, int avoid_reserve);
 695 struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
 696                                 nodemask_t *nmask, gfp_t gfp_mask,
 697                                 bool allow_alloc_fallback);
 698 int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
 699                         pgoff_t idx);
 700 void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 701                                 unsigned long address, struct folio *folio);
 702
 703 /* arch callback */
 704 int __init __alloc_bootmem_huge_page(struct hstate *h, int nid);
 705 int __init alloc_bootmem_huge_page(struct hstate *h, int nid);
 706 bool __init hugetlb_node_alloc_supported(void);
 707
 708 void __init hugetlb_add_hstate(unsigned order);
 709 bool __init arch_hugetlb_valid_size(unsigned long size);
 710 struct hstate *size_to_hstate(unsigned long size);
 711
 712 #ifndef HUGE_MAX_HSTATE
 713 #define HUGE_MAX_HSTATE 1
 714 #endif
 715
 716 extern struct hstate hstates[HUGE_MAX_HSTATE];
 717 extern unsigned int default_hstate_idx;
 718
 719 #define default_hstate (hstates[default_hstate_idx])
 720
 721 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
 722 {
 723         return folio->_hugetlb_subpool;
 724 }
 725
 726 static inline void hugetlb_set_folio_subpool(struct folio *folio,
 727                                         struct hugepage_subpool *subpool)
 728 {
 729         folio->_hugetlb_subpool = subpool;
 730 }
 731
 732 static inline struct hstate *hstate_file(struct file *f)
 733 {
 734         return hstate_inode(file_inode(f));
 735 }
 736
 737 static inline struct hstate *hstate_sizelog(int page_size_log)
 738 {
 739         if (!page_size_log)
 740                 return &default_hstate;
 741
 742         if (page_size_log < BITS_PER_LONG)
 743                 return size_to_hstate(1UL << page_size_log);
 744
 745         return NULL;
 746 }
 747
 748 static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
 749 {
 750         return hstate_file(vma->vm_file);
 751 }
 752
 753 static inline unsigned long huge_page_size(const struct hstate *h)
 754 {
 755         return (unsigned long)PAGE_SIZE << h->order;
 756 }
 757
 758 extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma);
 759
 760 extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma);
 761
 762 static inline unsigned long huge_page_mask(struct hstate *h)
 763 {
 764         return h->mask;
 765 }
 766
 767 static inline unsigned int huge_page_order(struct hstate *h)
 768 {
 769         return h->order;
 770 }
 771
 772 static inline unsigned huge_page_shift(struct hstate *h)
 773 {
 774         return h->order + PAGE_SHIFT;
 775 }
 776
 777 static inline bool hstate_is_gigantic(struct hstate *h)
 778 {
 779         return huge_page_order(h) > MAX_PAGE_ORDER;
 780 }
 781
 782 static inline unsigned int pages_per_huge_page(const struct hstate *h)
 783 {
 784         return 1 << h->order;
 785 }
 786
 787 static inline unsigned int blocks_per_huge_page(struct hstate *h)
 788 {
 789         return huge_page_size(h) / 512;
 790 }
 791
 792 static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
 793                                 struct address_space *mapping, pgoff_t idx)
 794 {
 795         return filemap_lock_folio(mapping, idx << huge_page_order(h));
 796 }
 797
 798 #include <asm/hugetlb.h>
 799
 800 #ifndef is_hugepage_only_range
 801 static inline int is_hugepage_only_range(struct mm_struct *mm,
 802                                         unsigned long addr, unsigned long len)
 803 {
 804         return 0;
 805 }
 806 #define is_hugepage_only_range is_hugepage_only_range
 807 #endif
 808
 809 #ifndef arch_clear_hugetlb_flags
 810 static inline void arch_clear_hugetlb_flags(struct folio *folio) { }
 811 #define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
 812 #endif
 813
 814 #ifndef arch_make_huge_pte
 815 static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
 816                                        vm_flags_t flags)
 817 {
 818         return pte_mkhuge(entry);
 819 }
 820 #endif
 821
 822 static inline struct hstate *folio_hstate(struct folio *folio)
 823 {
 824         VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
 825         return size_to_hstate(folio_size(folio));
 826 }
 827
 828 static inline unsigned hstate_index_to_shift(unsigned index)
 829 {
 830         return hstates[index].order + PAGE_SHIFT;
 831 }
 832
 833 static inline int hstate_index(struct hstate *h)
 834 {
 835         return h - hstates;
 836 }
 837
 838 int dissolve_free_hugetlb_folio(struct folio *folio);
 839 int dissolve_free_hugetlb_folios(unsigned long start_pfn,
 840                                     unsigned long end_pfn);
 841
 842 #ifdef CONFIG_MEMORY_FAILURE
 843 extern void folio_clear_hugetlb_hwpoison(struct folio *folio);
 844 #else
 845 static inline void folio_clear_hugetlb_hwpoison(struct folio *folio)
 846 {
 847 }
 848 #endif
 849
 850 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
 851 #ifndef arch_hugetlb_migration_supported
 852 static inline bool arch_hugetlb_migration_supported(struct hstate *h)
 853 {
 854         if ((huge_page_shift(h) == PMD_SHIFT) ||
 855                 (huge_page_shift(h) == PUD_SHIFT) ||
 856                         (huge_page_shift(h) == PGDIR_SHIFT))
 857                 return true;
 858         else
 859                 return false;
 860 }
 861 #endif
 862 #else
 863 static inline bool arch_hugetlb_migration_supported(struct hstate *h)
 864 {
 865         return false;
 866 }
 867 #endif
 868
 869 static inline bool hugepage_migration_supported(struct hstate *h)
 870 {
 871         return arch_hugetlb_migration_supported(h);
 872 }
 873
 874 /*
 875  * Movability check is different as compared to migration check.
 876  * It determines whether or not a huge page should be placed on
 877  * movable zone or not. Movability of any huge page should be
 878  * required only if huge page size is supported for migration.
 879  * There won't be any reason for the huge page to be movable if
 880  * it is not migratable to start with. Also the size of the huge
 881  * page should be large enough to be placed under a movable zone
 882  * and still feasible enough to be migratable. Just the presence
 883  * in movable zone does not make the migration feasible.
 884  *
 885  * So even though large huge page sizes like the gigantic ones
 886  * are migratable they should not be movable because its not
 887  * feasible to migrate them from movable zone.
 888  */
 889 static inline bool hugepage_movable_supported(struct hstate *h)
 890 {
 891         if (!hugepage_migration_supported(h))
 892                 return false;
 893
 894         if (hstate_is_gigantic(h))
 895                 return false;
 896         return true;
 897 }
 898
 899 /* Movability of hugepages depends on migration support. */
 900 static inline gfp_t htlb_alloc_mask(struct hstate *h)
 901 {
 902         if (hugepage_movable_supported(h))
 903                 return GFP_HIGHUSER_MOVABLE;
 904         else
 905                 return GFP_HIGHUSER;
 906 }
 907
 908 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
 909 {
 910         gfp_t modified_mask = htlb_alloc_mask(h);
 911
 912         /* Some callers might want to enforce node */
 913         modified_mask |= (gfp_mask & __GFP_THISNODE);
 914
 915         modified_mask |= (gfp_mask & __GFP_NOWARN);
 916
 917         return modified_mask;
 918 }
 919
 920 static inline bool htlb_allow_alloc_fallback(int reason)
 921 {
 922         bool allowed_fallback = false;
 923
 924         /*
 925          * Note: the memory offline, memory failure and migration syscalls will
 926          * be allowed to fallback to other nodes due to lack of a better chioce,
 927          * that might break the per-node hugetlb pool. While other cases will
 928          * set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool.
 929          */
 930         switch (reason) {
 931         case MR_MEMORY_HOTPLUG:
 932         case MR_MEMORY_FAILURE:
 933         case MR_SYSCALL:
 934         case MR_MEMPOLICY_MBIND:
 935                 allowed_fallback = true;
 936                 break;
 937         default:
 938                 break;
 939         }
 940
 941         return allowed_fallback;
 942 }
 943
 944 static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 945                                            struct mm_struct *mm, pte_t *pte)
 946 {
 947         if (huge_page_size(h) == PMD_SIZE)
 948                 return pmd_lockptr(mm, (pmd_t *) pte);
 949         VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
 950         return &mm->page_table_lock;
 951 }
 952
 953 #ifndef hugepages_supported
 954 /*
 955  * Some platform decide whether they support huge pages at boot
 956  * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0
 957  * when there is no such support
 958  */
 959 #define hugepages_supported() (HPAGE_SHIFT != 0)
 960 #endif
 961
 962 void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm);
 963
 964 static inline void hugetlb_count_init(struct mm_struct *mm)
 965 {
 966         atomic_long_set(&mm->hugetlb_usage, 0);
 967 }
 968
 969 static inline void hugetlb_count_add(long l, struct mm_struct *mm)
 970 {
 971         atomic_long_add(l, &mm->hugetlb_usage);
 972 }
 973
 974 static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
 975 {
 976         atomic_long_sub(l, &mm->hugetlb_usage);
 977 }
 978
 979 #ifndef huge_ptep_modify_prot_start
 980 #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
 981 static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
 982                                                 unsigned long addr, pte_t *ptep)
 983 {
 984         return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
 985 }
 986 #endif
 987
 988 #ifndef huge_ptep_modify_prot_commit
 989 #define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
 990 static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
 991                                                 unsigned long addr, pte_t *ptep,
 992                                                 pte_t old_pte, pte_t pte)
 993 {
 994         unsigned long psize = huge_page_size(hstate_vma(vma));
 995
 996         set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
 997 }
 998 #endif
 999
1000 #ifdef CONFIG_NUMA
1001 void hugetlb_register_node(struct node *node);
1002 void hugetlb_unregister_node(struct node *node);
1003 #endif
1004
1005 /*
1006  * Check if a given raw @page in a hugepage is HWPOISON.
1007  */
1008 bool is_raw_hwpoison_page_in_hugepage(struct page *page);
1009
1010 #else   /* CONFIG_HUGETLB_PAGE */
1011 struct hstate {};
1012
1013 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
1014 {
1015         return NULL;
1016 }
1017
1018 static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
1019                                 struct address_space *mapping, pgoff_t idx)
1020 {
1021         return NULL;
1022 }
1023
1024 static inline int isolate_or_dissolve_huge_page(struct page *page,
1025                                                 struct list_head *list)
1026 {
1027         return -ENOMEM;
1028 }
1029
1030 static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
1031                                            unsigned long addr,
1032                                            int avoid_reserve)
1033 {
1034         return NULL;
1035 }
1036
1037 static inline struct folio *
1038 alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
1039                         nodemask_t *nmask, gfp_t gfp_mask,
1040                         bool allow_alloc_fallback)
1041 {
1042         return NULL;
1043 }
1044
1045 static inline int __alloc_bootmem_huge_page(struct hstate *h)
1046 {
1047         return 0;
1048 }
1049
1050 static inline struct hstate *hstate_file(struct file *f)
1051 {
1052         return NULL;
1053 }
1054
1055 static inline struct hstate *hstate_sizelog(int page_size_log)
1056 {
1057         return NULL;
1058 }
1059
1060 static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
1061 {
1062         return NULL;
1063 }
1064
1065 static inline struct hstate *folio_hstate(struct folio *folio)
1066 {
1067         return NULL;
1068 }
1069
1070 static inline struct hstate *size_to_hstate(unsigned long size)
1071 {
1072         return NULL;
1073 }
1074
1075 static inline unsigned long huge_page_size(struct hstate *h)
1076 {
1077         return PAGE_SIZE;
1078 }
1079
1080 static inline unsigned long huge_page_mask(struct hstate *h)
1081 {
1082         return PAGE_MASK;
1083 }
1084
1085 static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
1086 {
1087         return PAGE_SIZE;
1088 }
1089
1090 static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
1091 {
1092         return PAGE_SIZE;
1093 }
1094
1095 static inline unsigned int huge_page_order(struct hstate *h)
1096 {
1097         return 0;
1098 }
1099
1100 static inline unsigned int huge_page_shift(struct hstate *h)
1101 {
1102         return PAGE_SHIFT;
1103 }
1104
1105 static inline bool hstate_is_gigantic(struct hstate *h)
1106 {
1107         return false;
1108 }
1109
1110 static inline unsigned int pages_per_huge_page(struct hstate *h)
1111 {
1112         return 1;
1113 }
1114
1115 static inline unsigned hstate_index_to_shift(unsigned index)
1116 {
1117         return 0;
1118 }
1119
1120 static inline int hstate_index(struct hstate *h)
1121 {
1122         return 0;
1123 }
1124
1125 static inline int dissolve_free_hugetlb_folio(struct folio *folio)
1126 {
1127         return 0;
1128 }
1129
1130 static inline int dissolve_free_hugetlb_folios(unsigned long start_pfn,
1131                                            unsigned long end_pfn)
1132 {
1133         return 0;
1134 }
1135
1136 static inline bool hugepage_migration_supported(struct hstate *h)
1137 {
1138         return false;
1139 }
1140
1141 static inline bool hugepage_movable_supported(struct hstate *h)
1142 {
1143         return false;
1144 }
1145
1146 static inline gfp_t htlb_alloc_mask(struct hstate *h)
1147 {
1148         return 0;
1149 }
1150
1151 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
1152 {
1153         return 0;
1154 }
1155
1156 static inline bool htlb_allow_alloc_fallback(int reason)
1157 {
1158         return false;
1159 }
1160
1161 static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
1162                                            struct mm_struct *mm, pte_t *pte)
1163 {
1164         return &mm->page_table_lock;
1165 }
1166
1167 static inline void hugetlb_count_init(struct mm_struct *mm)
1168 {
1169 }
1170
1171 static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m)
1172 {
1173 }
1174
1175 static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
1176 {
1177 }
1178
1179 static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
1180                                           unsigned long addr, pte_t *ptep)
1181 {
1182 #ifdef CONFIG_MMU
1183         return ptep_get(ptep);
1184 #else
1185         return *ptep;
1186 #endif
1187 }
1188
1189 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
1190                                    pte_t *ptep, pte_t pte, unsigned long sz)
1191 {
1192 }
1193
1194 static inline void hugetlb_register_node(struct node *node)
1195 {
1196 }
1197
1198 static inline void hugetlb_unregister_node(struct node *node)
1199 {
1200 }
1201
1202 static inline bool hugetlbfs_pagecache_present(
1203     struct hstate *h, struct vm_area_struct *vma, unsigned long address)
1204 {
1205         return false;
1206 }
1207 #endif  /* CONFIG_HUGETLB_PAGE */
1208
1209 static inline spinlock_t *huge_pte_lock(struct hstate *h,
1210                                         struct mm_struct *mm, pte_t *pte)
1211 {
1212         spinlock_t *ptl;
1213
1214         ptl = huge_pte_lockptr(h, mm, pte);
1215         spin_lock(ptl);
1216         return ptl;
1217 }
1218
1219 #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA)
1220 extern void __init hugetlb_cma_reserve(int order);
1221 #else
1222 static inline __init void hugetlb_cma_reserve(int order)
1223 {
1224 }
1225 #endif
1226
1227 #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
1228 static inline bool hugetlb_pmd_shared(pte_t *pte)
1229 {
1230         return page_count(virt_to_page(pte)) > 1;
1231 }
1232 #else
1233 static inline bool hugetlb_pmd_shared(pte_t *pte)
1234 {
1235         return false;
1236 }
1237 #endif
1238
1239 bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
1240
1241 #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
1242 /*
1243  * ARCHes with special requirements for evicting HUGETLB backing TLB entries can
1244  * implement this.
1245  */
1246 #define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
1247 #endif
1248
1249 static inline bool __vma_shareable_lock(struct vm_area_struct *vma)
1250 {
1251         return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data;
1252 }
1253
1254 bool __vma_private_lock(struct vm_area_struct *vma);
1255
1256 /*
1257  * Safe version of huge_pte_offset() to check the locks.  See comments
1258  * above huge_pte_offset().
1259  */
1260 static inline pte_t *
1261 hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz)
1262 {
1263 #if defined(CONFIG_HUGETLB_PAGE) && \
1264         defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP)
1265         struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
1266
1267         /*
1268          * If pmd sharing possible, locking needed to safely walk the
1269          * hugetlb pgtables.  More information can be found at the comment
1270          * above huge_pte_offset() in the same file.
1271          *
1272          * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP.
1273          */
1274         if (__vma_shareable_lock(vma))
1275                 WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) &&
1276                              !lockdep_is_held(
1277                                  &vma->vm_file->f_mapping->i_mmap_rwsem));
1278 #endif
1279         return huge_pte_offset(vma->vm_mm, addr, sz);
1280 }
1281
1282 #endif /* _LINUX_HUGETLB_H */