http://blog.sina.com.cn/s/blog_4d66a3cb0100prfe.html
附上lowmemorykiller.c代码
/* drivers/misc/lowmemorykiller.c
*
* The lowmemorykiller driver lets user-space specify a set of memory thresholds
* where processes with a range of oom_score_adj values will get killed. Specify
* the minimum oom_score_adj values in
* /sys/module/lowmemorykiller/parameters/adj and the number of free pages in
* /sys/module/lowmemorykiller/parameters/minfree. Both files take a comma
* separated list of numbers in ascending order.
*
* For example, write "0,8" to /sys/module/lowmemorykiller/parameters/adj and
* "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill
* processes with a oom_score_adj value of 8 or higher when the free memory
* drops below 4096 pages and kill processes with a oom_score_adj value of 0 or
* higher when the free memory drops below 1024 pages.
*
* The driver considers memory used for caches to be free, but if a large
* percentage of the cached memory is locked this can be very inaccurate
* and processes may not get killed until the normal oom killer is triggered.
*
* Copyright (C) 2007-2008 Google, Inc.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/oom.h>
#include <linux/sched.h>
#include <linux/rcupdate.h>
#include <linux/notifier.h>
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/swap.h>
#include <linux/fs.h>
#include <linux/cpuset.h>
#ifdef CONFIG_HIGHMEM
#define _ZONE ZONE_HIGHMEM
#else
#define _ZONE ZONE_NORMAL
#endif
static uint32_t lowmem_debug_level = 1;
static short lowmem_adj[6] = {
0,
1,
6,
12,
};
static int lowmem_adj_size = 4;
static int lowmem_minfree[6] = {
3 * 512, /* 6MB */
2 * 1024, /* 8MB */
4 * 1024, /* 16MB */
16 * 1024, /* 64MB */
};
static int lowmem_minfree_size = 4;
static int lmk_fast_run = 1;
static unsigned long lowmem_deathpending_timeout;
#define lowmem_print(level, x...) \
do { \
if (lowmem_debug_level >= (level)) \
pr_info(x); \
} while (0)
static int test_task_flag(struct task_struct *p, int flag)
{
struct task_struct *t = p;
do {
task_lock(t);
if (test_tsk_thread_flag(t, flag)) {
task_unlock(t);
return 1;
}
task_unlock(t);
} while_each_thread(p, t);
return 0;
}
static DEFINE_MUTEX(scan_mutex);
int can_use_cma_pages(gfp_t gfp_mask)
{
int can_use = 0;
int mtype = allocflags_to_migratetype(gfp_mask);
int i = 0;
int *mtype_fallbacks = get_migratetype_fallbacks(mtype);
if (is_migrate_cma(mtype)) {
can_use = 1;
} else {
for (i = 0;; i++) {
int fallbacktype = mtype_fallbacks[i];
if (is_migrate_cma(fallbacktype)) {
can_use = 1;
break;
}
if (fallbacktype == MIGRATE_RESERVE)
break;
}
}
return can_use;
}
void tune_lmk_zone_param(struct zonelist *zonelist, int classzone_idx,
int *other_free, int *other_file,
int use_cma_pages)
{
struct zone *zone;
struct zoneref *zoneref;
int zone_idx;
for_each_zone_zonelist(zone, zoneref, zonelist, MAX_NR_ZONES) {
zone_idx = zonelist_zone_idx(zoneref);
if (zone_idx == ZONE_MOVABLE) {
if (!use_cma_pages)
*other_free -=
zone_page_state(zone, NR_FREE_CMA_PAGES);
continue;
}
if (zone_idx > classzone_idx) {
if (other_free != NULL)
*other_free -= zone_page_state(zone,
NR_FREE_PAGES);
if (other_file != NULL)
*other_file -= zone_page_state(zone,
NR_FILE_PAGES)
- zone_page_state(zone, NR_SHMEM);
} else if (zone_idx < classzone_idx) {
if (zone_watermark_ok(zone, 0, 0, classzone_idx, 0)) {
if (!use_cma_pages) {
*other_free -= min(
zone->lowmem_reserve[classzone_idx] +
zone_page_state(
zone, NR_FREE_CMA_PAGES),
zone_page_state(
zone, NR_FREE_PAGES));
} else {
*other_free -=
zone->lowmem_reserve[classzone_idx];
}
} else {
*other_free -=
zone_page_state(zone, NR_FREE_PAGES);
}
}
}
}
#ifdef CONFIG_HIGHMEM
void adjust_gfp_mask(gfp_t *gfp_mask)
{
struct zone *preferred_zone;
struct zonelist *zonelist;
enum zone_type high_zoneidx;
if (current_is_kswapd()) {
zonelist = node_zonelist(0, *gfp_mask);
high_zoneidx = gfp_zone(*gfp_mask);
first_zones_zonelist(zonelist, high_zoneidx, NULL,
&preferred_zone);
if (high_zoneidx == ZONE_NORMAL) {
if (zone_watermark_ok_safe(preferred_zone, 0,
high_wmark_pages(preferred_zone), 0,
0))
*gfp_mask |= __GFP_HIGHMEM;
} else if (high_zoneidx == ZONE_HIGHMEM) {
*gfp_mask |= __GFP_HIGHMEM;
}
}
}
#else
void adjust_gfp_mask(gfp_t *unused)
{
}
#endif
void tune_lmk_param(int *other_free, int *other_file, struct shrink_control *sc)
{
gfp_t gfp_mask;
struct zone *preferred_zone;
struct zonelist *zonelist;
enum zone_type high_zoneidx, classzone_idx;
unsigned long balance_gap;
int use_cma_pages;
gfp_mask = sc->gfp_mask;
adjust_gfp_mask(&gfp_mask);
zonelist = node_zonelist(0, gfp_mask);
high_zoneidx = gfp_zone(gfp_mask);
first_zones_zonelist(zonelist, high_zoneidx, NULL, &preferred_zone);
classzone_idx = zone_idx(preferred_zone);
use_cma_pages = can_use_cma_pages(gfp_mask);
balance_gap = min(low_wmark_pages(preferred_zone),
(preferred_zone->present_pages +
KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
KSWAPD_ZONE_BALANCE_GAP_RATIO);
if (likely(current_is_kswapd() && zone_watermark_ok(preferred_zone, 0,
high_wmark_pages(preferred_zone) + SWAP_CLUSTER_MAX +
balance_gap, 0, 0))) {
if (lmk_fast_run)
tune_lmk_zone_param(zonelist, classzone_idx, other_free,
other_file, use_cma_pages);
else
tune_lmk_zone_param(zonelist, classzone_idx, other_free,
NULL, use_cma_pages);
if (zone_watermark_ok(preferred_zone, 0, 0, _ZONE, 0)) {
if (!use_cma_pages) {
*other_free -= min(
preferred_zone->lowmem_reserve[_ZONE]
+ zone_page_state(
preferred_zone, NR_FREE_CMA_PAGES),
zone_page_state(
preferred_zone, NR_FREE_PAGES));
} else {
*other_free -=
preferred_zone->lowmem_reserve[_ZONE];
}
} else {
*other_free -= zone_page_state(preferred_zone,
NR_FREE_PAGES);
}
lowmem_print(4, "lowmem_shrink of kswapd tunning for highmem "
"ofree %d, %d\n", *other_free, *other_file);
} else {
tune_lmk_zone_param(zonelist, classzone_idx, other_free,
other_file, use_cma_pages);
if (!use_cma_pages) {
*other_free -=
zone_page_state(preferred_zone, NR_FREE_CMA_PAGES);
}
lowmem_print(4, "lowmem_shrink tunning for others ofree %d, "
"%d\n", *other_free, *other_file);
}
}
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
{
struct task_struct *tsk;
struct task_struct *selected = NULL;
int rem = 0;
int tasksize;
int i;
short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
int minfree = 0;
int selected_tasksize = 0;
short selected_oom_score_adj;
int array_size = ARRAY_SIZE(lowmem_adj);
int other_free;
int other_file;
unsigned long nr_to_scan = sc->nr_to_scan;
if (nr_to_scan > 0) {
if (mutex_lock_interruptible(&scan_mutex) < 0)
return 0;
}
other_free = global_page_state(NR_FREE_PAGES);
if (global_page_state(NR_SHMEM) + total_swapcache_pages() <
global_page_state(NR_FILE_PAGES))
other_file = global_page_state(NR_FILE_PAGES) -
global_page_state(NR_SHMEM) -
total_swapcache_pages();
else
other_file = 0;
tune_lmk_param(&other_free, &other_file, sc);
if (lowmem_adj_size < array_size)
array_size = lowmem_adj_size;
if (lowmem_minfree_size < array_size)
array_size = lowmem_minfree_size;
for (i = 0; i < array_size; i++) {
minfree = lowmem_minfree[i];
if (other_free < minfree && other_file < minfree) {
min_score_adj = lowmem_adj[i];
break;
}
}
if (nr_to_scan > 0)
lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n",
nr_to_scan, sc->gfp_mask, other_free,
other_file, min_score_adj);
rem = global_page_state(NR_ACTIVE_ANON) +
global_page_state(NR_ACTIVE_FILE) +
global_page_state(NR_INACTIVE_ANON) +
global_page_state(NR_INACTIVE_FILE);
if (nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n",
nr_to_scan, sc->gfp_mask, rem);
if (nr_to_scan > 0)
mutex_unlock(&scan_mutex);
return rem;
}
selected_oom_score_adj = min_score_adj;
rcu_read_lock();
for_each_process(tsk) {
struct task_struct *p;
short oom_score_adj;
if (tsk->flags & PF_KTHREAD)
continue;
/* if task no longer has any memory ignore it */
if (test_task_flag(tsk, TIF_MM_RELEASED))
continue;
if (time_before_eq(jiffies, lowmem_deathpending_timeout)) {
if (test_task_flag(tsk, TIF_MEMDIE)) {
rcu_read_unlock();
/* give the system time to free up the memory */
msleep_interruptible(20);
mutex_unlock(&scan_mutex);
return 0;
}
}
p = find_lock_task_mm(tsk);
if (!p)
continue;
oom_score_adj = p->signal->oom_score_adj;
if (oom_score_adj < min_score_adj) {
task_unlock(p);
continue;
}
tasksize = get_mm_rss(p->mm);
task_unlock(p);
if (tasksize <= 0)
continue;
if (selected) {
if (oom_score_adj < selected_oom_score_adj)
continue;
if (oom_score_adj == selected_oom_score_adj &&
tasksize <= selected_tasksize)
continue;
}
selected = p;
selected_tasksize = tasksize;
selected_oom_score_adj = oom_score_adj;
lowmem_print(3, "select '%s' (%d), adj %hd, size %d, to kill\n",
p->comm, p->pid, oom_score_adj, tasksize);
}
if (selected) {
lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \
" to free %ldkB on behalf of '%s' (%d) because\n" \
" cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \
" Free memory is %ldkB above reserved.\n" \
" Free CMA is %ldkB\n" \
" Total reserve is %ldkB\n" \
" Total free pages is %ldkB\n" \
" Total file cache is %ldkB\n" \
" GFP mask is 0x%x\n",
selected->comm, selected->pid,
selected_oom_score_adj,
selected_tasksize * (long)(PAGE_SIZE / 1024),
current->comm, current->pid,
other_file * (long)(PAGE_SIZE / 1024),
minfree * (long)(PAGE_SIZE / 1024),
min_score_adj,
other_free * (long)(PAGE_SIZE / 1024),
global_page_state(NR_FREE_CMA_PAGES) *
(long)(PAGE_SIZE / 1024),
totalreserve_pages * (long)(PAGE_SIZE / 1024),
global_page_state(NR_FREE_PAGES) *
(long)(PAGE_SIZE / 1024),
global_page_state(NR_FILE_PAGES) *
(long)(PAGE_SIZE / 1024),
sc->gfp_mask);
if (lowmem_debug_level >= 2 && selected_oom_score_adj == 0) {
show_mem(SHOW_MEM_FILTER_NODES);
dump_tasks(NULL, NULL);
}
lowmem_deathpending_timeout = jiffies + HZ;
send_sig(SIGKILL, selected, 0);
set_tsk_thread_flag(selected, TIF_MEMDIE);
rem -= selected_tasksize;
rcu_read_unlock();
/* give the system time to free up the memory */
msleep_interruptible(20);
} else
rcu_read_unlock();
lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
nr_to_scan, sc->gfp_mask, rem);
mutex_unlock(&scan_mutex);
return rem;
}
static struct shrinker lowmem_shrinker = {
.shrink = lowmem_shrink,
.seeks = DEFAULT_SEEKS * 16
};
static int __init lowmem_init(void)
{
register_shrinker(&lowmem_shrinker);
return 0;
}
static void __exit lowmem_exit(void)
{
unregister_shrinker(&lowmem_shrinker);
}
#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
static short lowmem_oom_adj_to_oom_score_adj(short oom_adj)
{
if (oom_adj == OOM_ADJUST_MAX)
return OOM_SCORE_ADJ_MAX;
else
return (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
}
static void lowmem_autodetect_oom_adj_values(void)
{
int i;
short oom_adj;
short oom_score_adj;
int array_size = ARRAY_SIZE(lowmem_adj);
if (lowmem_adj_size < array_size)
array_size = lowmem_adj_size;
if (array_size <= 0)
return;
oom_adj = lowmem_adj[array_size - 1];
if (oom_adj > OOM_ADJUST_MAX)
return;
oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
if (oom_score_adj <= OOM_ADJUST_MAX)
return;
lowmem_print(1, "lowmem_shrink: convert oom_adj to oom_score_adj:\n");
for (i = 0; i < array_size; i++) {
oom_adj = lowmem_adj[i];
oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
lowmem_adj[i] = oom_score_adj;
lowmem_print(1, "oom_adj %d => oom_score_adj %d\n",
oom_adj, oom_score_adj);
}
}
static int lowmem_adj_array_set(const char *val, const struct kernel_param *kp)
{
int ret;
ret = param_array_ops.set(val, kp);
/* HACK: Autodetect oom_adj values in lowmem_adj array */
lowmem_autodetect_oom_adj_values();
return ret;
}
static int lowmem_adj_array_get(char *buffer, const struct kernel_param *kp)
{
return param_array_ops.get(buffer, kp);
}
static void lowmem_adj_array_free(void *arg)
{
param_array_ops.free(arg);
}
static struct kernel_param_ops lowmem_adj_array_ops = {
.set = lowmem_adj_array_set,
.get = lowmem_adj_array_get,
.free = lowmem_adj_array_free,
};
static const struct kparam_array __param_arr_adj = {
.max = ARRAY_SIZE(lowmem_adj),
.num = &lowmem_adj_size,
.ops = ¶m_ops_short,
.elemsize = sizeof(lowmem_adj[0]),
.elem = lowmem_adj,
};
#endif
module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
__module_param_call(MODULE_PARAM_PREFIX, adj,
&lowmem_adj_array_ops,
.arr = &__param_arr_adj,
S_IRUGO | S_IWUSR, -1);
__MODULE_PARM_TYPE(adj, "array of short");
#else
module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size,
S_IRUGO | S_IWUSR);
#endif
module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
S_IRUGO | S_IWUSR);
module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR);
module_param_named(lmk_fast_run, lmk_fast_run, int, S_IRUGO | S_IWUSR);
module_init(lowmem_init);
module_exit(lowmem_exit);
MODULE_LICENSE("GPL");
Android中,进程的生命周期都是由系统控制的,即使用户关掉了程序,进程依然是存在于内存之中。这样设计的目的是为了下次能快速启动。当然,随着系统运行时间的增长,内存会越来越少。Android Kernel 会定时执行一次检查,杀死一些进程,释放掉内存。
首先定义shrinker结构体,lowmem_shrink为回调函数的指针,当有内存分页回收的时候,这个函数将会被调用。
合理配置这张表,对于小内存设备有非常重要的作用。
Lowmeme_adj中各项数值代表阈值的警戒级数,lowmem_minfree代表对应级数的剩余内存。也就是说,当系统的剩余内存为小于6MB时候,警戒级数为0,当系统内存剩余小于8M而大于6M的时候,警戒级数为1,当内存小于64M大于16MB的时候,警戒级数为12. Low memory killer 的规则就是根据当前系统的剩余内存多少来获取当前的警戒级数,如果进程的oom_adj大于警戒级数并且最大,进程将会被杀死(相同omm_adj的,则杀死占用内存较多的)。Omm_adj越小,代表进程越重要。一些前台的进程,oom_adj会比较小,而后台的服务,omm_adj会比较大,所以当内存不足的时候,Low memory killer 杀掉的必然先杀掉的是后台服务而不是前台的进程。
OK,现在我们来看具体代码,也就是lowmem_shrink这个回调函数:
首先通过global_page_state获取当前剩余内存大小,然后根据剩余内存和内存阈值表查找当前的内存警戒数min_adj。接着遍历所有进程,找到oom_adj大于min_adj并且oom_adj最大的进程:
进程的oom_adj 小于警戒阈值,则无视。
获取这个进程所占用的内存大小tasksize,如果小于比我们当前选出进程的内存,则无视。
如果大于则选中这个进程:
经过for_each的遍历,selected 就是我们选出要释放掉的bad进程,它具有下面两个条件:
- Oom_adj大于当前警戒阈值并且最大
- 在同样大小的oom_adj中,占用内存最多
最后,我们释放掉这个进程的内存,通过force_sig(SIGKILL, selected)来向进程发送一个不可以忽略或阻塞的SIGKILL信号。
阈值表可以通过/sys/module/lowmemorykiller/parameters/adj和/sys/module/lowmemorykiller/parameters/minfree进行配置,例如在init.rc中:
# Write value must be consistent with the above properties.
write /sys/module/lowmemorykiller/parameters/adj 0,1,2,7,14,15
write /proc/sys/vm/overcommit_memory 1
write /sys/module/lowmemorykiller/parameters/minfree 1536,2048,4096,5120,5632,6144
class_start default
进程oom_adj同样可以进行设置,通过write /proc/<PID>/oom_adj
# Set init its forked children's oom_adj.
write /proc/1/oom_adj -16
其他未赋值的都在static块中进行了初始化,是通过system/rootdir/init.rc进行配置的:
# Define the oom_adj values for the classes of processes that can be
# killed by the kernel.
# Define the memory thresholds at which the above process classes will
# be killed.