X-Git-Url: https://repo.jachan.dev/linux.git/blobdiff_plain/df91c3513f9a1da5388a5d5972fe4f777fe0d516..0a647f3811d6af56405a819341ceac23e31d4572:/mm/page_alloc.c diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c3edb624fccf..8125263be60f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -65,6 +65,9 @@ #include #include "internal.h" +/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ +static DEFINE_MUTEX(pcp_batch_high_lock); + #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DEFINE_PER_CPU(int, numa_node); EXPORT_PER_CPU_SYMBOL(numa_node); @@ -1179,10 +1182,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) { unsigned long flags; int to_drain; + unsigned long batch; local_irq_save(flags); - if (pcp->count >= pcp->batch) - to_drain = pcp->batch; + batch = ACCESS_ONCE(pcp->batch); + if (pcp->count >= batch) + to_drain = batch; else to_drain = pcp->count; if (to_drain > 0) { @@ -1350,8 +1355,9 @@ void free_hot_cold_page(struct page *page, int cold) list_add(&page->lru, &pcp->lists[migratetype]); pcp->count++; if (pcp->count >= pcp->high) { - free_pcppages_bulk(zone, pcp->batch, pcp); - pcp->count -= pcp->batch; + unsigned long batch = ACCESS_ONCE(pcp->batch); + free_pcppages_bulk(zone, batch, pcp); + pcp->count -= batch; } out: @@ -4032,6 +4038,39 @@ static int __meminit zone_batchsize(struct zone *zone) #endif } +/* + * pcp->high and pcp->batch values are related and dependent on one another: + * ->batch must never be higher then ->high. + * The following function updates them in a safe manner without read side + * locking. + * + * Any new users of pcp->batch and pcp->high should ensure they can cope with + * those fields changing asynchronously (acording the the above rule). + * + * mutex_is_locked(&pcp_batch_high_lock) required when calling this function + * outside of boot time (or some other assurance that no concurrent updaters + * exist). + */ +static void pageset_update(struct per_cpu_pages *pcp, unsigned long high, + unsigned long batch) +{ + /* start with a fail safe value for batch */ + pcp->batch = 1; + smp_wmb(); + + /* Update high, then batch, in order */ + pcp->high = high; + smp_wmb(); + + pcp->batch = batch; +} + +/* a companion to setup_pagelist_highmark() */ +static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch) +{ + pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch)); +} + static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) { struct per_cpu_pages *pcp; @@ -4041,8 +4080,7 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) pcp = &p->pcp; pcp->count = 0; - pcp->high = 6 * batch; - pcp->batch = max(1UL, 1 * batch); + pageset_set_batch(p, batch); for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) INIT_LIST_HEAD(&pcp->lists[migratetype]); } @@ -4051,17 +4089,14 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist * to the value high for the pageset p. */ - static void setup_pagelist_highmark(struct per_cpu_pageset *p, unsigned long high) { - struct per_cpu_pages *pcp; + unsigned long batch = max(1UL, high / 4); + if ((high / 4) > (PAGE_SHIFT * 8)) + batch = PAGE_SHIFT * 8; - pcp = &p->pcp; - pcp->high = high; - pcp->batch = max(1UL, high/4); - if ((high/4) > (PAGE_SHIFT * 8)) - pcp->batch = PAGE_SHIFT * 8; + pageset_update(&p->pcp, high, batch); } static void __meminit setup_zone_pageset(struct zone *zone) @@ -5551,6 +5586,8 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, ret = proc_dointvec_minmax(table, write, buffer, length, ppos); if (!write || (ret < 0)) return ret; + + mutex_lock(&pcp_batch_high_lock); for_each_populated_zone(zone) { for_each_possible_cpu(cpu) { unsigned long high; @@ -5559,6 +5596,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, per_cpu_ptr(zone->pageset, cpu), high); } } + mutex_unlock(&pcp_batch_high_lock); return 0; } @@ -6047,32 +6085,19 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages) #endif #ifdef CONFIG_MEMORY_HOTPLUG -static int __meminit __zone_pcp_update(void *data) -{ - struct zone *zone = data; - int cpu; - unsigned long batch = zone_batchsize(zone), flags; - - for_each_possible_cpu(cpu) { - struct per_cpu_pageset *pset; - struct per_cpu_pages *pcp; - - pset = per_cpu_ptr(zone->pageset, cpu); - pcp = &pset->pcp; - - local_irq_save(flags); - if (pcp->count > 0) - free_pcppages_bulk(zone, pcp->count, pcp); - drain_zonestat(zone, pset); - setup_pageset(pset, batch); - local_irq_restore(flags); - } - return 0; -} - +/* + * The zone indicated has a new number of managed_pages; batch sizes and percpu + * page high values need to be recalulated. + */ void __meminit zone_pcp_update(struct zone *zone) { - stop_machine(__zone_pcp_update, zone, NULL); + unsigned cpu; + unsigned long batch; + mutex_lock(&pcp_batch_high_lock); + batch = zone_batchsize(zone); + for_each_possible_cpu(cpu) + pageset_set_batch(per_cpu_ptr(zone->pageset, cpu), batch); + mutex_unlock(&pcp_batch_high_lock); } #endif