http://blog.youkuaiyun.com/embeddedfly/article/details/6411691
我们接着看信号量的操作,在semaphore_p()函数中调用了semop(sem_id, &sem_b, 1) 函数,在sys_ipc()系统调用中则会到达
case SEMOP: return sys_semtimedop ( first, ( struct sembuf __user * ) ptr, second, NULL ) ;
可以看到转向sys_semtiemdop处执行,在进入这个函数之前我们先看一下在应用程序中调用semop()时传递的参数,参数搞不清我们将无法理解系统调用的走向,semop()函数的主要作用是改变信号量的值,它的第一个参数是信号量的标识符,就象消息队列和共享内存中我们讲的那样,第二个参数则指向一个sembuf数据结构
/* semop system calls takes an array of these. */ struct sembuf { unsigned short sem_num; /* semaphore index in array */ short sem_op; /* semaphore operation */ short sem_flg; /* operation flags */ } ;
sembuf结构中的sem_num是信号量的个数,sem_op是要改变的信号量的值,一般是P操作为-1,而在V操作中+1,而sem_flg是信号量的一些操作标记,一般是SEM_UNDO表示万一进程未及时归还信号量将由这个标记告诉内核由内核代为处理。我们看到在应用程序中
struct sembuf sem_b; sem_b. sem_num = 0; sem_b. sem_op = - 1; /* P() */ sem_b. sem_flg = SEM_UNDO;
这里将信号量的个数设为0,也就是只有一个信号量数组中只有一个元素,而信号量的操作数是-1,所以是P操作,另外sem_flg设为了SEM_UNDO,表示如果进程中途退出了将由内核代他“清帐”。应用程序这边看明白了我们可以接着对照着sys_semtimedop()函数看了,可以看出参数传递其中,这个函数在ipc/sem.c的1042行处。
asmlinkage long sys_semtimedop( int semid, struct sembuf __user * tsops, unsigned nsops, const struct timespec __user * timeout) { int error = - EINVAL; struct sem_array * sma; struct sembuf fast_sops[ SEMOPM_FAST] ; struct sembuf* sops = fast_sops, * sop; struct sem_undo * un; int undos = 0, alter = 0, max ; struct sem_queue queue ; unsigned long jiffies_left = 0; struct ipc_namespace * ns; ns = current- > nsproxy- > ipc_ns; if ( nsops < 1 | | semid < 0) return - EINVAL; if ( nsops > ns- > sc_semopm) return - E2BIG; if ( nsops > SEMOPM_FAST) { sops = kmalloc( sizeof ( * sops) * nsops, GFP_KERNEL) ; if ( sops= = NULL ) return - ENOMEM; } if ( copy_from_user ( sops, tsops, nsops * sizeof ( * tsops) ) ) { error = - EFAULT; goto out_free; } if ( timeout) { struct timespec _timeout; if ( copy_from_user( & _timeout, timeout, sizeof ( * timeout) ) ) { error = - EFAULT; goto out_free; } if ( _timeout. tv_sec < 0 | | _timeout. tv_nsec < 0 | | _timeout. tv_nsec > = 1000000000L) { error = - EINVAL; goto out_free; } jiffies_left = timespec_to_jiffies( & _timeout) ; }
我们看到第一个参数是信号量的标识符,第二个参数则是我们应用程序中创建好的sembuf结构变量,第三个参数则是1,我们看这段代码中最主要的copy_from_user,这个函数我们以后会重点讲到,这个函数涉及到嵌入式汇编以后详细描述,这段代码其余的部分相信只要看过前边的消息队列和共享内存的处理后这里也不是难事,这里我们可以看到有一个时间检测timeout的参数,我们可以看到上面传递过来为NULL,因此我们忽略这部分代码,但是里面的代码部分也说明了是对时间的相关操作。暂且跳过,必尽我们的目标是顺着应用程序看内核
max = 0; for ( sop = sops; sop < sops + nsops; sop+ + ) { if ( sop- > sem_num > = max ) max = sop- > sem_num; if ( sop- > sem_flg & SEM_UNDO) undos = 1; if ( sop- > sem_op ! = 0) alter = 1; } retry_undos: if ( undos) { un = find_undo( ns, semid) ; if ( IS_ERR( un) ) { error = PTR_ERR( un) ; goto out_free; } } else un = NULL ; sma = sem_lock_check( ns, semid) ; if ( IS_ERR( sma) ) { error = PTR_ERR( sma) ; goto out_free; }
接着内核循环检测是检测是否有SEM_UNDO并且循环进入find_undo去执行信号量的undo操作
static struct sem_undo * find_undo( struct ipc_namespace * ns, int semid) { struct sem_array * sma; struct sem_undo_list * ulp; struct sem_undo * un, * new ; int nsems; int error ; error = get_undo_list( & ulp) ; if ( error ) return ERR_PTR( error ) ; spin_lock( & ulp- > lock) ; un = lookup_undo( ulp, semid) ; spin_unlock( & ulp- > lock) ; if ( likely( un!=NULL) ) goto out; /* no undo structure around - allocate one. */ sma = sem_lock_check( ns, semid) ; if ( IS_ERR( sma) ) return ERR_PTR( PTR_ERR( sma) ) ; nsems = sma- > sem_nsems; sem_getref_and_unlock( sma) ; new = kzalloc( sizeof ( struct sem_undo) + sizeof ( short ) * nsems, GFP_KERNEL) ; if ( ! new ) { sem_putref( sma) ; return ERR_PTR( - ENOMEM) ; } new - > semadj = ( short * ) & new [ 1] ; new - > semid = semid; spin_lock( & ulp- > lock) ; un = lookup_undo( ulp, semid) ; if ( un) { spin_unlock( & ulp- > lock) ; kfree( new ) ; sem_putref( sma) ; goto out; } sem_lock_and_putref( sma) ; if ( sma- > sem_perm. deleted) { sem_unlock( sma) ; spin_unlock( & ulp- > lock) ; kfree( new ) ; un = ERR_PTR( - EIDRM) ; goto out; } new - > proc_next = ulp- > proc_list; ulp- > proc_list = new ; new - > id_next = sma- > undo; sma- > undo = new ; sem_unlock( sma) ; un = new ; spin_unlock( & ulp- > lock) ; out: return un; }
这个函数中我们首先要接触一下sem_undo数据结构
/* Each task has a list of undo requests. They are executed automatically * when the process exits. */ struct sem_undo { struct sem_undo * proc_next; /* next entry on this process */ struct sem_undo * id_next; /* next entry on this semaphore set */ int semid; /* semaphore set identifier */ short * semadj; /* array of adjustments, one per semaphore */ } ;
我们刚才介绍过进程在中途退出时都要委托内核“清帐”就是这里的sem_undo结构记录的,应用程序中我们看到具有了信号的标记SEM_UNDO,所以内核在这里会为进程分配一个sem_undo结构“记帐”,并且一个进程有一个sem_undo队列以方便记住所有的“帐”,即进程都是需要对哪些信号量有还原的义务。同时,每个信号量也有一个undo指针,指向一个sem_undo队列,它使信号量记住所有“欠帐”的进程,这二个队列都需要sem_undo结构中的proc_next和id_next指针负责链入的,可以顺着这二个指针找到所有相关同一个信号量的进程。通过这段介绍上面这段代码非常清晰了。回到sys_semtimedop()函数中我们继续看
if ( un & & un- > semid = = - 1) { sem_unlock( sma) ; goto retry_undos; } error = - EFBIG; if ( max > = sma- > sem_nsems) goto out_unlock_free; error = - EACCES; if ( ipcperms( & sma- > sem_perm, alter ? S_IWUGO : S_IRUGO) ) goto out_unlock_free; error = security_sem_semop( sma, sops, nsops, alter) ; if ( error ) goto out_unlock_free; error = try_atomic_semop ( sma, sops, nsops, un, task_tgid_vnr( current) ) ; if ( error < = 0) { if ( alter & & error = = 0) update_queue ( sma) ; goto out_unlock_free; }
上面的代码最重要的是try_atomic_semop()函数,这个函数是对信号量操作的关键
static int try_atomic_semop ( struct sem_array * sma, struct sembuf * sops, int nsops, struct sem_undo * un, int pid) { int result, sem_op; struct sembuf * sop; struct sem * curr; for ( sop = sops; sop < sops + nsops; sop+ + ) { curr = sma- > sem_base + sop- > sem_num; sem_op = sop- > sem_op; result = curr- > semval; if ( ! sem_op & & result) goto would_block; result + = sem_op; if ( result < 0) goto would_block; if ( result > SEMVMX) goto out_of_range ; if ( sop- > sem_flg & SEM_UNDO) { int undo = un- > semadj[ sop- > sem_num] - sem_op; /* * Exceeding the undo range is an error. */ if ( undo < ( - SEMAEM - 1) | | undo > SEMAEM) goto out_of_range ; } curr- > semval = result; } sop- - ; while ( sop > = sops) { sma- > sem_base[ sop- > sem_num] . sempid = pid; if ( sop- > sem_flg & SEM_UNDO) un- > semadj[ sop- > sem_num] - = sop- > sem_op; sop- - ; } sma- > sem_otime = get_seconds( ) ; return 0; out_of_range : result = - ERANGE ; goto undo; would_block: if ( sop- > sem_flg & IPC_NOWAIT) result = - EAGAIN; else result = 1; undo: sop- - ; while ( sop > = sops) { sma- > sem_base[ sop- > sem_num] . semval - = sop- > sem_op; sop- - ; } return result; }
这个函数重点实质性的二句,第一句是取得对信号量值的计算result += sem_op;第二句是将操作后的值赋给信号量curr->semval = result;这个函数中我们还可以看到“还帐”的操作,即sma->sem_base[sop->sem_num].semval -= sop->sem_op;上面加上这里减去也就还原了。整个函数的跳转看来很复杂,只不过分了几种情况,首先是对信号量的操作值大于固定 SEMVMX的值,就要退出并执行undo还原操作。其次情况是对信号量的操作值变成了负值,这出现在信号量已经被其他进程抢先P操作了,此时就要goto would_block处,通常进程就会睡眠等待了,我们看到到那里设置出错码然后执行undo还原操作。最后一种情况是操作值为0时,代码:if (!sem_op && result)也会goto would_block处。我们的应用程序执行到这里顺利取得了信号量,回到上面的函数我们继续看
/* We need to sleep on this operation, so we put the current * task into the pending queue and go to sleep. */ queue . sma = sma; queue . sops = sops; queue . nsops = nsops; queue . undo = un; queue . pid = task_tgid_vnr( current) ; queue . id = semid; queue . alter = alter; if ( alter) append_to_queue( sma , & queue ) ; else prepend_to_queue( sma , & queue ) ; queue . status = - EINTR; queue . sleeper = current; current- > state = TASK_INTERRUPTIBLE; sem_unlock( sma) ; if ( timeout) jiffies_left = schedule_timeout( jiffies_left) ; else schedule( ) ; error = queue . status; while ( unlikely( error = = IN_WAKEUP) ) { cpu_relax( ) ; error = queue . status; } if ( error ! = - EINTR) { /* fast path: update_queue already obtained all requested * resources */ goto out_free; } sma = sem_lock( ns, semid) ; if ( IS_ERR( sma) ) { BUG_ON( queue . prev ! = NULL ) ; error = - EIDRM; goto out_free; } /* * If queue.status != -EINTR we are woken up by another process */ error = queue . status; if ( error ! = - EINTR) { goto out_unlock_free; } /* * If an interrupt occurred we have to clean up the queue */ if ( timeout & & jiffies_left = = 0) error = - EAGAIN; remove_from_queue( sma, & queue ) ; goto out_unlock_free; out_unlock_free: sem_unlock( sma) ; out_free: if ( sops ! = fast_sops) kfree( sops) ; return error ; }
和以前我们说过的消息队列一样通过睡眠时要建立信号量的队列,是用sem_queue结构来完成的
/* One queue for each sleeping process in the system. */ struct sem_queue { struct sem_queue * next; /* next entry in the queue */ struct sem_queue * * prev; /* previous entry in the queue, *(q->prev) == q */ struct task_struct* sleeper; /* this process */ struct sem_undo * undo; /* undo structure */ int pid; /* process id of requesting process */ int status; /* completion status of operation */ struct sem_array * sma; /* semaphore array for operations */ int id; /* internal sem id */ struct sembuf * sops; /* array of pending operations */ int nsops; /* number of operations */ int alter; /* does the operation alter the array? */ } ;
我们看到这里通过与进程挂上钩建立好联系就要转入进程调度,在进入调试时current->state = TASK_INTERRUPTIBLE;使进程转入睡眠状态。那这个进程什么时候会被唤醒呢,我们想到已经得到信号量的“欠帐”进程,不管那些进程如何,内核也可以代为操作,都会最终“清帐”也就是执行完函数try_atomic_semop()然后update_queue ()来唤醒这些睡眠的进程,以通知他们可以来取得信号量了。
static void update_queue ( struct sem_array * sma) { int error ; struct sem_queue * q; q = sma- > sem_pending; while ( q) { error = try_atomic_semop( sma, q- > sops, q- > nsops, q- > undo, q- > pid) ; /* Does q->sleeper still need to sleep? */ if ( error < = 0) { struct sem_queue * n; remove_from_queue( sma, q) ; q- > status = IN_WAKEUP; /* * Continue scanning. The next operation * that must be checked depends on the type of the * completed operation: * - if the operation modified the array, then * restart from the head of the queue and * check for threads that might be waiting * for semaphore values to become 0. * - if the operation didn't modify the array, * then just continue. */ if ( q- > alter) n = sma- > sem_pending; else n = q- > next; wake_up_process( q- > sleeper) ; /* hands-off: q will disappear immediately after * writing q->status. */ smp_wmb( ) ; q- > status = error ; q = n; } else { q = q- > next; } } }
这个函数在while循环中首先再次调用try_atomic_semop函数来试一下队列中的第一个进程是否能取得信号量的操作,如果成功了就要唤醒进程使他运行,负责的话就试试下一个在信号量队列中等待的进程。顺着next的指针链直到穷尽所有等待进程。不过我们在这里看似好象是唤醒的排在队列中最前的有优先权,其实我们应该明白在前面的函数中有关键的挂入函数是不同的,
if ( alter) append_to_queue( sma , & queue ) ; else prepend_to_queue( sma , & queue ) ;
这里根据alter来决定是否更变挂入队列的顺序,这二个函数很简单,到这里我们就介绍了信号量的应用程序到内核的操作。其他关于信号量的操作都很简单了,只要看过前边消息队列和共享内存,自己阅读就不再是难事了,我们起到了关键的“导游地图”的作用即可。