/*
* For kswapd, balance_pgdat() will work across all this node's zones until
* they are all at high_wmark_pages(zone).
*
* Returns the highest zone idx kswapd was reclaiming at
*
* There is special handling here for zones which are full of pinned pages.
* This can happen if the pages are all mlocked, or if they are all used by
* device drivers (say, ZONE_DMA). Or if they are all in use by hugetlb.
* What we do is to detect the case where all pages in the zone have been
* scanned twice and there has been zero successful reclaim. Mark the zone as
* dead and from now on, only perform a short scan. Basically we're polling
* the zone for when the problem goes away.
*
* kswapd scans the zones in the highmem->normal->dma direction. It skips
* zones which have free_pages > high_wmark_pages(zone), but once a zone is
* found to have free_pages <= high_wmark_pages(zone), we scan that zone and the
* lower zones regardless of the number of free pages in the lower zones. This
* interoperates with the page allocator fallback scheme to ensure that aging
* of pages is balanced across the zones.
*/
static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
{
int i;
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.order = order,
.priority = DEF_PRIORITY,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = 1,
};
count_vm_event(PAGEOUTRUN);
do {
bool raise_priority = true;
sc.nr_reclaimed = 0;
/*Scan in the highmem->dma direction for the highest zone which needs scanning */
for (i = pgdat->nr_zones - 1; i >= 0; i--) {
struct zone *zone = pgdat->node_zones + i;
if (!populated_zone(zone))
continue;
if (sc.priority != DEF_PRIORITY && !zone_reclaimable(zone))
continue;
/*Do some background aging of the anon list, to give pages a chance to be referenced before reclaiming.*/
age_active_anon(zone, &sc);
/*If the number of buffer_heads in the machine exceeds the maximum allowed level and this node
* has a highmem zone, force kswapd to reclaim from it to relieve lowmem pressure. */
if (buffer_heads_over_limit && is_highmem_idx(i)) {
end_zone = i;
break;
}
if (!zone_balanced(zone, order, false, 0, 0)) {
end_zone = i;
break;
} else {
/*If balanced, clear the dirty and congested flags*/
clear_bit(ZONE_CONGESTED, &zone->flags);
clear_bit(ZONE_DIRTY, &zone->flags);
}
}
if (i < 0)
goto out;
/*If we're getting trouble reclaiming, start doing writepage even in laptop mode.*/
if (sc.priority < DEF_PRIORITY - 2)
sc.may_writepage = 1;
/*
* Now scan the zone in the dma->highmem direction, stopping at the last zone which needs scanning.
* We do this because the page allocator works in the opposite direction. This prevents the page allocator from allocating
* pages behind kswapd's direction of progress, which would cause too much scanning of the lower zones.*/
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
if (!populated_zone(zone))
continue;
if (sc.priority != DEF_PRIORITY && !zone_reclaimable(zone))
continue;
sc.nr_scanned = 0;
nr_soft_scanned = 0;
/*Call soft limit reclaim before calling shrink_zone.*/
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,order, sc.gfp_mask,&nr_soft_scanned);
sc.nr_reclaimed += nr_soft_reclaimed;
/** There should be no need to raise the scanning priority if enough pages are already being scanned
* that that high watermark would be met at 100% efficiency. */
if (kswapd_shrink_zone(zone, end_zone, &sc))
raise_priority = false;
}
/* If the low watermark is met there is no need for processes to be throttled on pfmemalloc_wait as they should not be
* able to safely make forward progress. Wake them */
if (waitqueue_active(&pgdat->pfmemalloc_wait) && pfmemalloc_watermark_ok(pgdat))
wake_up_all(&pgdat->pfmemalloc_wait);
/* Check if kswapd should be suspending */
if (try_to_freeze() || kthread_should_stop())
break;
/* Raise priority if scanning rate is too low or there was no progress in reclaiming pages */
if (raise_priority || !sc.nr_reclaimed)
sc.priority--;
} while (sc.priority >= 1 &&!pgdat_balanced(pgdat, order, classzone_idx));
out:
/*Return the highest zone idx we were reclaiming at so prepare_kswapd_sleep() makes the same decisions as here.
*/
return end_zone;
}