splitting it would free up some memory. Pages on split queue are
going to be split under memory pressure.
+thp_underused_split_page
+ is incremented when a huge page on the split queue was split
+ because it was underused. A THP is underused if the number of
+ zero pages in the THP is above a certain threshold
+ (/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none).
+
thp_split_pmd
is incremented every time a PMD split into table of PTEs.
This can happen, for instance, when application calls mprotect() or
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
mm_inc_nr_ptes(vma->vm_mm);
+ deferred_split_folio(folio, false);
spin_unlock(vmf->ptl);
count_vm_event(THP_FAULT_ALLOC);
count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC);
return READ_ONCE(ds_queue->split_queue_len);
}
+static bool thp_underused(struct folio *folio)
+{
+ int num_zero_pages = 0, num_filled_pages = 0;
+ void *kaddr;
+ int i;
+
+ if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1)
+ return false;
+
+ for (i = 0; i < folio_nr_pages(folio); i++) {
+ kaddr = kmap_local_folio(folio, i * PAGE_SIZE);
+ if (!memchr_inv(kaddr, 0, PAGE_SIZE)) {
+ num_zero_pages++;
+ if (num_zero_pages > khugepaged_max_ptes_none) {
+ kunmap_local(kaddr);
+ return true;
+ }
+ } else {
+ /*
+ * Another path for early exit once the number
+ * of non-zero filled pages exceeds threshold.
+ */
+ num_filled_pages++;
+ if (num_filled_pages >= HPAGE_PMD_NR - khugepaged_max_ptes_none) {
+ kunmap_local(kaddr);
+ return false;
+ }
+ }
+ kunmap_local(kaddr);
+ }
+ return false;
+}
+
static unsigned long deferred_split_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
list_for_each_entry_safe(folio, next, &list, _deferred_list) {
+ bool did_split = false;
+ bool underused = false;
+
+ if (!folio_test_partially_mapped(folio)) {
+ underused = thp_underused(folio);
+ if (!underused)
+ goto next;
+ }
if (!folio_trylock(folio))
goto next;
- /* split_huge_page() removes page from list on success */
- if (!split_folio(folio))
+ if (!split_folio(folio)) {
+ did_split = true;
+ if (underused)
+ count_vm_event(THP_UNDERUSED_SPLIT_PAGE);
split++;
+ }
folio_unlock(folio);
next:
+ /*
+ * split_folio() removes folio from list on success.
+ * Only add back to the queue if folio is partially mapped.
+ * If thp_underused returns false, or if split_folio fails
+ * in the case it was underused, then consider it used and
+ * don't add it back to split_queue.
+ */
+ if (!did_split && !folio_test_partially_mapped(folio)) {
+ list_del_init(&folio->_deferred_list);
+ ds_queue->split_queue_len--;
+ }
folio_put(folio);
}
*
* Note that these are only respected if collapse was initiated by khugepaged.
*/
-static unsigned int khugepaged_max_ptes_none __read_mostly;
+unsigned int khugepaged_max_ptes_none __read_mostly;
static unsigned int khugepaged_max_ptes_swap __read_mostly;
static unsigned int khugepaged_max_ptes_shared __read_mostly;
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, address, pmd, _pmd);
update_mmu_cache_pmd(vma, address, pmd);
+ deferred_split_folio(folio, false);
spin_unlock(pmd_ptl);
folio = NULL;