这段时间,断断续续的忙了一阵,因为父亲的去世也不情愿的休息了20来天,一点也没有办法。回来后重新开始学习android的启动流程。对android系统级别的学习,阅读代码成为了唯一的办法,不像应用程序开发来得那么明了快捷。之前花了好多时间才对android的binder驱动做了一定的了解,最近几天从android的启动画面,分析到了init这个内核最先启动的一个进程。参考内容包括老罗的android之旅和邓平凡老师的深入理解android卷,本人只是对学习做一定的总结,帮助自己进一步理解。
一 . init.c中的main函数(路径:system/core/init/init.c)
先给出main的源码,然后对个别关键函数进行分析
- int main(int argc, char **argv)
- {
- int fd_count = 0;
- struct pollfd ufds[4];
- char *tmpdev;
- char* debuggable;
- char tmp[32];
- int property_set_fd_init = 0;
- int signal_fd_init = 0;
- int keychord_fd_init = 0;
- if (!strcmp(basename(argv[0]), "ueventd"))
- return ueventd_main(argc, argv);
- /* clear the umask */
- umask(0);
- /* Get the basic filesystem setup we need put
- * together in the initramdisk on / and then we'll
- * let the rc file figure out the rest.
- */
- mkdir("/dev", 0755);
- mkdir("/proc", 0755);
- mkdir("/sys", 0755);
- mount("tmpfs", "/dev", "tmpfs", 0, "mode=0755");
- mkdir("/dev/pts", 0755);
- mkdir("/dev/socket", 0755);
- mount("devpts", "/dev/pts", "devpts", 0, NULL);
- mount("proc", "/proc", "proc", 0, NULL);
- mount("sysfs", "/sys", "sysfs", 0, NULL);
- /* We must have some place other than / to create the
- * device nodes for kmsg and null, otherwise we won't
- * be able to remount / read-only later on.
- * Now that tmpfs is mounted on /dev, we can actually
- * talk to the outside world.
- */
- open_devnull_stdio();
- log_init();
- INFO("reading config file\n");
- init_parse_config_file("/init.rc");
- /* pull the kernel commandline and ramdisk properties file in */
- import_kernel_cmdline(0);
- get_hardware_name(hardware, &revision);
- snprintf(tmp, sizeof(tmp), "/init.%s.rc", hardware);//和平台硬件hardware有关系
- init_parse_config_file(tmp);
- action_for_each_trigger("early-init", action_add_queue_tail);
- queue_builtin_action(wait_for_coldboot_done_action, "wait_for_coldboot_done");
- queue_builtin_action(property_init_action, "property_init");
- queue_builtin_action(keychord_init_action, "keychord_init");
- queue_builtin_action(console_init_action, "console_init");//第二个开机画面显示函数
- queue_builtin_action(set_init_properties_action, "set_init_properties");
- /* execute all the boot actions to get us started */
- action_for_each_trigger("init", action_add_queue_tail);
- action_for_each_trigger("early-fs", action_add_queue_tail);
- action_for_each_trigger("fs", action_add_queue_tail);
- action_for_each_trigger("post-fs", action_add_queue_tail);
- queue_builtin_action(property_service_init_action, "property_service_init");
- queue_builtin_action(signal_init_action, "signal_init");
- queue_builtin_action(check_startup_action, "check_startup");
- /* execute all the boot actions to get us started */
- action_for_each_trigger("early-boot", action_add_queue_tail);
- action_for_each_trigger("boot", action_add_queue_tail);//把boot这个action添加到action_queue链表中
- /* run all property triggers based on current state of the properties */
- queue_builtin_action(queue_property_triggers_action, "queue_propety_triggers");
- #if BOOTCHART
- queue_builtin_action(bootchart_init_action, "bootchart_init");
- #endif
- for(;;) {
- int nr, i, timeout = -1;
- execute_one_command(); //检查action_queue列表是否为空,执行action
- restart_processes();//检查是否有进程需要重启
- if (!property_set_fd_init && get_property_set_fd() > 0) {
- ufds[fd_count].fd = get_property_set_fd();
- ufds[fd_count].events = POLLIN;
- ufds[fd_count].revents = 0;
- fd_count++;
- property_set_fd_init = 1;
- }
- if (!signal_fd_init && get_signal_fd() > 0) {
- ufds[fd_count].fd = get_signal_fd();
- ufds[fd_count].events = POLLIN;
- ufds[fd_count].revents = 0;
- fd_count++;
- signal_fd_init = 1;
- }
- if (!keychord_fd_init && get_keychord_fd() > 0) {
- ufds[fd_count].fd = get_keychord_fd();
- ufds[fd_count].events = POLLIN;
- ufds[fd_count].revents = 0;
- fd_count++;
- keychord_fd_init = 1;
- }
- if (process_needs_restart) {
- timeout = (process_needs_restart - gettime()) * 1000;
- if (timeout < 0)
- timeout = 0;
- }
- if (!action_queue_empty() || cur_action)
- timeout = 0;
- #if BOOTCHART
- if (bootchart_count > 0) {
- if (timeout < 0 || timeout > BOOTCHART_POLLING_MS)
- timeout = BOOTCHART_POLLING_MS;
- if (bootchart_step() < 0 || --bootchart_count == 0) {
- bootchart_finish();
- bootchart_count = 0;
- }
- }
- #endif
- nr = poll(ufds, fd_count, timeout);
- if (nr <= 0)
- continue;
- for (i = 0; i < fd_count; i++) {
- if (ufds[i].revents == POLLIN) {
- if (ufds[i].fd == get_property_set_fd())
- handle_property_set_fd();
- else if (ufds[i].fd == get_keychord_fd())
- handle_keychord();
- else if (ufds[i].fd == get_signal_fd())
- handle_signal();
- }
- }
- }
- return 0;
- }
init作为用户空间第一个启动的进程,需要完成很多的任务。分以下部分内容来分析
1. uevent进程
if (!strcmp(basename(argv[0]), "ueventd"))
return ueventd_main(argc, argv);
这个函数是取出argv中的第一个参数,比如/sbin/ueventd,则basename为ueventd。android系统第一次启动的进程名init,所以该函数ueventd_main不执行,该函数的真正执行在init启动service ueventd /sbin/ueventd后,fork出一个子进程,execve启动/sbin/ueventd后,实际上该函数是对init的符合链接,也就是ueventd进程执行起来后执行的代码还是init.c中的main,因此不同的进程名执行相同的main函数。ueventd_main函数的主要功能:在Linux系统中现在都使用uevent机制来管理设备的热插拔事件,给用户空间权利来完成一些设备文件节点的创建。这种机制是建立在socket的通信机制上,用户空间和内核驱动进行交互,详细的机制没有去了解过。是linux2.6的版本中常用的机制。比如驱动出现device_create等时,会向用户空间报告一个uevent事件,用户空间由uevent进程解析后去创建设备节点。
2.init.rc的解析
INFO("reading config file\n");
init_parse_config_file("/init.rc");
init.rc是一个配置文件,内部有许多的语言规则,所有语言会在init_parse_config_file中进行解析。调用流程如下:init_parse_config_file—>read_file—>parse_config.
parse_config源码如下:
- static void parse_config(const char *fn, char *s)//s为init.rc中字符串的内容
- {
- struct parse_state state;
- char *args[INIT_PARSER_MAXARGS];
- int nargs;
- nargs = 0;
- state.filename = fn;
- state.line = 1;
- state.ptr = s;
- state.nexttoken = 0;
- state.parse_line = parse_line_no_op;
- for (;;) {
- switch (next_token(&state)) {
- case T_EOF: //文件的结尾
- state.parse_line(&state, 0, 0);
- return;
- case T_NEWLINE://新的一行
- if (nargs) {
- int kw = lookup_keyword(args[0]); //读取init.rc返回关键字例如service,返回K_service
- if (kw_is(kw, SECTION)) { //查看关键字是否为SECTION,只有service和on满足
- state.parse_line(&state, 0, 0);
- parse_new_section(&state, kw, nargs, args);
- } else {
- state.parse_line(&state, nargs, args);//on 和service两个段下面的内容
- }
- nargs = 0;
- }
- break;
- case T_TEXT://文本内容
- if (nargs < INIT_PARSER_MAXARGS) {
- args[nargs++] = state.text;
- }
- break;
- }
- }
- }
- int init_parse_config_file(const char *fn)
- {
- char *data;
- data = read_file(fn, 0);
- if (!data) return -1;
- parse_config(fn, data);
- DUMP();
- return 0;
- }
这个函数中可以看到在for的无邪循环中,主要对init.rc的内容进行解析,以一行一行进行读取,每读取完一行内容换行时到下一行时,使用lookup_keyword分析已经读取的一行的第一个参数,部分代码如下:
- case 's':
- if (!strcmp(s, "ervice")) return K_service;
- if (!strcmp(s, "etenv")) return K_setenv;
- if (!strcmp(s, "etkey")) return K_setkey;
- if (!strcmp(s, "etprop")) return K_setprop;
- if (!strcmp(s, "etrlimit")) return K_setrlimit;
- if (!strcmp(s, "ocket")) return K_socket;
- if (!strcmp(s, "tart")) return K_start;
- if (!strcmp(s, "top")) return K_stop;
- if (!strcmp(s, "ymlink")) return K_symlink;
- if (!strcmp(s, "ysclktz")) return K_sysclktz;
该函数主要对每一行的第一个字符做case,然后在strcmp第一个命令,这些命令都是按init.rc的格式要求来进行的。比如常用的service和on等经过lookup_keyword后返回K_servcie和K_on。随后使用kw_is(kw, SECTION)判断返回的kw是不是属于SECTION类型,在init.rc中只有service和on满足该类型,这样就会对on和service所在的段进行解析,我们这里首先分析service,以init.rc中的service zygote为例
- service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-system-server
- class main
- socket zygote stream 666
- onrestart write /sys/android_power/request_state wake
- onrestart write /sys/power/state on
- onrestart restart media
- onrestart restart netd
当解析到这段代码时,执行parse_service
- static void *parse_service(struct parse_state *state, int nargs, char **args)
- {
- struct service *svc;
- if (nargs < 3) {
- parse_error(state, "services must have a name and a program\n");
- return 0;
- }
- if (!valid_name(args[1])) {
- parse_error(state, "invalid service name '%s'\n", args[1]);
- return 0;
- }
- svc = service_find_by_name(args[1]);//查找服务是否已经存在
- if (svc) {
- parse_error(state, "ignored duplicate definition of service '%s'\n", args[1]);
- return 0;
- }
- nargs -= 2;
- svc = calloc(1, sizeof(*svc) + sizeof(char*) * nargs);
- if (!svc) {
- parse_error(state, "out of memory\n");
- return 0;
- }
- svc->name = args[1]; //sevice的名字
- svc->classname = "default"; //svc的类名默认是default
- memcpy(svc->args, args + 2, sizeof(char*) * nargs);//首个参数放的是可执行文件
- svc->args[nargs] = 0;
- svc->nargs = nargs;//参数个数
- svc->onrestart.name = "onrestart";
- list_init(&svc->onrestart.commands);
- list_add_tail(&service_list, &svc->slist);
- return svc;
- }
在这里agrs[1]就是zygote,系统会先查找是否已经存在该服务,然后构建一个service svc,进行相关的填充,包括服务名,服务所属的类别名字,已经服务启动带入的参数个数(要减去service和服务名zygote),最后将这个svc加入到service_list全局链表中。随后所做的是对Service的下面几行Option进行解析,比如class,socket,onrestart等等。使用的是parse_line_service函数,如下:
- static void parse_line_service(struct parse_state *state, int nargs, char **args)
- {
- struct service *svc = state->context;
- struct command *cmd;
- int i, kw, kw_nargs;
- if (nargs == 0) {
- return;
- }
- svc->ioprio_class = IoSchedClass_NONE;
- kw = lookup_keyword(args[0]);
- switch (kw) {
- case K_capability:
- break;
- case K_class:
- if (nargs != 2) {
- parse_error(state, "class option requires a classname\n");
- } else {
- svc->classname = args[1];//比如main,core类
- }
- break;
- case K_console:
- svc->flags |= SVC_CONSOLE;
- break;
- case K_disabled:
- svc->flags |= SVC_DISABLED;
- ......
- case K_onrestart:
- nargs--;
- args++;
- kw = lookup_keyword(args[0]);
- if (!kw_is(kw, COMMAND)) {
- parse_error(state, "invalid command '%s'\n", args[0]);
- break;
- }
- kw_nargs = kw_nargs(kw);
- if (nargs < kw_nargs) {
- parse_error(state, "%s requires %d %s\n", args[0], kw_nargs - 1,
- kw_nargs > 2 ? "arguments" : "argument");
- break;
- }
- cmd = malloc(sizeof(*cmd) + sizeof(char*) * nargs);
- cmd->func = kw_func(kw);
- cmd->nargs = nargs;
- memcpy(cmd->args, args, sizeof(char*) * nargs);
- list_add_tail(&svc->onrestart.commands, &cmd->clist);
- break;
- .......
- }
这里以class这个keyword为例,会将当前class所属的svc进行类名的变革,变为main类别,类似的socket和onrestart类似。
到此为止整个service都解析完成 ,开始下一个section的内容。但是zygote这个服务进程的启动还没有开始,将在下面分析。
下面分析on字段的内容,以on boot这个section作为列子进行分析
- on boot
- ifup lo
- hostname localhost
- domainname localdomain
- ....
- # Set this property so surfaceflinger is not started by system_init
- setprop system_init.startsurfaceflinger 0
- class_start core
- class_start main
和前面分析像类似,case中进入K_on选项执行函数parse_action
- static void *parse_action(struct parse_state *state, int nargs, char **args)
- {
- struct action *act;
- if (nargs < 2) {
- parse_error(state, "actions must have a trigger\n");
- return 0;
- }
- if (nargs > 2) {
- parse_error(state, "actions may not have extra parameters\n");
- return 0;
- }
- act = calloc(1, sizeof(*act));
- act->name = args[1]; //action的名字如boot,init等
- list_init(&act->commands);
- list_add_tail(&action_list, &act->alist);
- /* XXX add to hash */
- return act;
- }
在这里可以看到一个action结构体类似于service,这个action的名字为boot,最后会将这个action加入到全局链表action_list中。
随后执行parse_line_action函数,对on字段所在的option进行解析,代码如下:
- static void parse_line_action(struct parse_state* state, int nargs, char **args) //action所在的行
- {
- struct command *cmd;
- struct action *act = state->context;//on boot启动
- int (*func)(int nargs, char **args);
- int kw, n;
- if (nargs == 0) {
- return;
- }
- kw = lookup_keyword(args[0]);//命令的参数个数
- if (!kw_is(kw, COMMAND)) {
- parse_error(state, "invalid command '%s'\n", args[0]);
- return;
- }
- n = kw_nargs(kw);
- if (nargs < n) {
- parse_error(state, "%s requires %d %s\n", args[0], n - 1,
- n > 2 ? "arguments" : "argument");
- return;
- }
- cmd = malloc(sizeof(*cmd) + sizeof(char*) * nargs);
- cmd->func = kw_func(kw);
- cmd->nargs = nargs;
- memcpy(cmd->args, args, sizeof(char*) * nargs);
- list_add_tail(&act->commands, &cmd->clist); //
这里以class_start main为例该关键字为 KEYWORD(class_start, COMMAND, 1, do_class_start),填充一个command结构体,包括这个cmd的执行函数如class_start对应的func为do_class_start,函数的参数个数nargs=1。同时将这个cmd添加到action的commands所在的全局列表中。本文中将会出现2个cmd。
至此,on和service两个section已经举列子分析完成。
3 下面继续分析main函数中的queue_builtin_action和action_for_each_trigger。
queue_builtin_action(console_init_action, "console_init");//第二个开机画面显示函数
该函数实现将console_init这个action添加到action_queue全局链表中看。
action_for_each_trigger("boot", action_add_queue_tail);//把boot这个action添加到action_queue链表中
- void action_for_each_trigger(const char *trigger,
- void (*func)(struct action *act))
- {
- struct listnode *node;
- struct action *act;
- list_for_each(node, &action_list) {
- act = node_to_item(node, struct action, alist);
- if (!strcmp(act->name, trigger)) {
- func(act);
- }
- }
- }
在该函数中,首先遍历action_list链表,找到action,看是否有名字叫boot的trigger存在,我们知道刚才在解析init.rc中的on boot时,将boot这个作为action的name加入到了action_list中去,所以可以找到这个boot的action。成功匹配后调用action_add_queue_tail,家这个action再次加入到action_queue中,等待着执行。
4 for(;;)循环中执行execute_one_command
- void execute_one_command(void)
- {
- int ret;
- if (!cur_action || !cur_command || is_last_command(cur_action, cur_command)) {
- cur_action = action_remove_queue_head();
- cur_command = NULL;
- if (!cur_action)
- return;
- INFO("processing action %p (%s)\n", cur_action, cur_action->name);
- cur_command = get_first_command(cur_action);
- } else {
- cur_command = get_next_command(cur_action, cur_command);
- }
- if (!cur_command)
- return;
- ret = cur_command->func(cur_command->nargs, cur_command->args);//执行class_start等
- INFO("command '%s' r=%d\n", cur_command->args[0], ret);
- }
使用action_remove_queue_head获取action_queue链表中的action后,移除该节点,使用get_first_command获得在action中的命令,比如这里出现的boot和console_init这两个action。针对console_init启动console_init_action这个函数。如果是boot则会对boot这个action所具有的commands链表进行cmd的获取,class_start的func指针函数为do_class_start:
- int do_class_start(int nargs, char **args)
- {
- /* Starting a class does not start services
- * which are explicitly disabled. They must
- * be started individually.
- */
- service_for_each_class(args[1], service_start_if_not_disabled);//查找要启动的舒服所属类是否是当前要启动的类
- return 0;
- }
可以看到提取了命令行的第二个参数入main,core等。在service_for_each_class中遍历service_list查找属于该类的service,如我们前面提到的zygote,查找到后执行service_start_if_not_disabled——>service_start至此我们进入了启动service的代码
- void service_start(struct service *svc, const char *dynamic_args)
- {
- struct stat s;
- pid_t pid;
- int needs_console;
- int n;
- /* starting a service removes it from the disabled
- * state and immediately takes it out of the restarting
- * state if it was in there
- */
- svc->flags &= (~(SVC_DISABLED|SVC_RESTARTING));
- svc->time_started = 0;//服务的启动时间设为0
- /* running processes require no additional work -- if
- * they're in the process of exiting, we've ensured
- * that they will immediately restart on exit, unless
- * they are ONESHOT
- */
- if (svc->flags & SVC_RUNNING) {
- return;
- }
- needs_console = (svc->flags & SVC_CONSOLE) ? 1 : 0;
- if (needs_console && (!have_console)) {
- ERROR("service '%s' requires console\n", svc->name);
- svc->flags |= SVC_DISABLED;
- return;
- }
- if (stat(svc->args[0], &s) != 0) { //通过文件名获取文件信息保存到s的buf中
- ERROR("cannot find '%s', disabling '%s'\n", svc->args[0], svc->name);
- svc->flags |= SVC_DISABLED;
- return;
- }
- if ((!(svc->flags & SVC_ONESHOT)) && dynamic_args) {
- ERROR("service '%s' must be one-shot to use dynamic args, disabling\n",
- svc->args[0]);
- svc->flags |= SVC_DISABLED;
- return;
- }
- NOTICE("starting '%s'\n", svc->name);
- pid = fork();//创建子进程
- if (pid == 0) { //子进程
- struct socketinfo *si;
- struct svcenvinfo *ei;
- char tmp[32];
- int fd, sz;
- if (properties_inited()) {
- get_property_workspace(&fd, &sz);
- sprintf(tmp, "%d,%d", dup(fd), sz);
- add_environment("ANDROID_PROPERTY_WORKSPACE", tmp);
- }
- for (ei = svc->envvars; ei; ei = ei->next)
- add_environment(ei->name, ei->value);
- for (si = svc->sockets; si; si = si->next) {
- int socket_type = (
- !strcmp(si->type, "stream") ? SOCK_STREAM :
- (!strcmp(si->type, "dgram") ? SOCK_DGRAM : SOCK_SEQPACKET));
- int s = create_socket(si->name, socket_type,
- si->perm, si->uid, si->gid);//创建套接字
- if (s >= 0) {
- publish_socket(si->name, s);
- }
- }
- if (svc->ioprio_class != IoSchedClass_NONE) {
- if (android_set_ioprio(getpid(), svc->ioprio_class, svc->ioprio_pri)) {
- ERROR("Failed to set pid %d ioprio = %d,%d: %s\n",
- getpid(), svc->ioprio_class, svc->ioprio_pri, strerror(errno));
- }
- }
- if (needs_console) {
- setsid();
- open_console();
- } else {
- zap_stdio();
- }
- #if 0
- for (n = 0; svc->args[n]; n++) {
- INFO("args[%d] = '%s'\n", n, svc->args[n]);
- }
- for (n = 0; ENV[n]; n++) {
- INFO("env[%d] = '%s'\n", n, ENV[n]);
- }
- #endif
- setpgid(0, getpid());
- /* as requested, set our gid, supplemental gids, and uid */
- if (svc->gid) {
- setgid(svc->gid);
- }
- if (svc->nr_supp_gids) {
- setgroups(svc->nr_supp_gids, svc->supp_gids);
- }
- if (svc->uid) {
- setuid(svc->uid);
- }
- if (!dynamic_args) {
- if (execve(svc->args[0], (char**) svc->args, (char**) ENV) < 0) {
- ERROR("cannot execve('%s'): %s\n", svc->args[0], strerror(errno));//执行服务的可执行文件
- }
- } else {
- char *arg_ptrs[INIT_PARSER_MAXARGS+1];
- int arg_idx = svc->nargs;
- char *tmp = strdup(dynamic_args);
- char *next = tmp;
- char *bword;
- /* Copy the static arguments */
- memcpy(arg_ptrs, svc->args, (svc->nargs * sizeof(char *)));
- while((bword = strsep(&next, " "))) {
- arg_ptrs[arg_idx++] = bword;
- if (arg_idx == INIT_PARSER_MAXARGS)
- break;
- }
- arg_ptrs[arg_idx] = '\0';
- execve(svc->args[0], (char**) arg_ptrs, (char**) ENV);
- }
- _exit(127);
- }
- if (pid < 0) {
- ERROR("failed to start '%s'\n", svc->name);
- svc->pid = 0;
- return;
- }
- svc->time_started = gettime();
- svc->pid = pid;
- svc->flags |= SVC_RUNNING;
- if (properties_inited())
- notify_service_state(svc->name, "running");
- }
分析这段代码,主要内容:
a.检查当前service如zygote的flag即SVC_RUNNING(服务运行中),SVC_DISABLE等
b.fork一个子进程,子进程中会建立一个socket用于通信,同时使用if (execve(svc->args[0], (char**) svc->args, (char**) ENV) < 0)执行zygote对应的可执行文件,至此service zygote真正的启动。
到这里为止,对android系统的init启动有了清晰的了解,对init如何启动adbd,zygote等service有了一定的了解,以及对init.rc有了清晰的认识。init中还有部分内容等着后续几天做一定的学习。
补充:service进程的重启在restart_processes中进行,他会重启flag为SVC_RESTARTING的服务。这部分进程的重启其实在init由handle_signal来管理,一旦出现service崩溃,poll函数会接受到相关文件变化的信息,执行handle_signal中的wait_for_one_process
- static int wait_for_one_process(int block)
- {
- pid_t pid;
- int status;
- struct service *svc;
- struct socketinfo *si;
- time_t now;
- struct listnode *node;
- struct command *cmd;
- while ( (pid = waitpid(-1, &status, block ? 0 : WNOHANG)) == -1 && errno == EINTR );
- if (pid <= 0) return -1;
- INFO("waitpid returned pid %d, status = %08x\n", pid, status);
- svc = service_find_by_pid(pid);
- if (!svc) {
- ERROR("untracked pid %d exited\n", pid);
- return 0;
- }
- .....
- svc->flags |= SVC_RESTARTING;
- /* Execute all onrestart commands for this service. */
- list_for_each(node, &svc->onrestart.commands) {
- cmd = node_to_item(node, struct command, clist);
- cmd->func(cmd->nargs, cmd->args);
- }
- notify_service_state(svc->name, "restarting");
- return 0;
- }
该函数使用waitpid,找到子进程退出的进程号pid,然后查找到该service,对service中的onrestart这个commands进行操作,入restart media等。同时将service的flag设置为SVC_RESTARTING,这样就结合前面讲到的restart_processes重新启动该服务进程。。
1017

被折叠的 条评论
为什么被折叠?



