1、进程状态
TASK_RUNNING 运行状态
TASK_INTERRUPTIBLE 可被信号唤醒状态
TASK_UNINTERRUPTIBLE 不可被信号唤醒状态
TASK_STOPPED 进程执行被暂停状态
TASK_TRACED 进程执行已由debugger跟踪状态
EXIT_ZOMBIE 进程死亡等待父进程回收状态
EXIT_DEAD 进程死亡正在被回收状态
EXIT_ZOMBIE和EXIT_DEAD比较特殊可以被task_struct->state保存也可以被task_struct->exit_state保存
struct task_struct{
...
pid_t pid;
pid_t tgid;
}
进程一般被pid标识,pid与task_struct是一一对应的关系,一般单线程的进程tgid是与pid相等,但是多线程的进程tgid等于线程组中的头号线程pid,当然头号线程的tgid等于自身的pid,所以一般的getpid返回的是task_struct的tgid值而非pid值
在linux-4.4中紧挨着的是thread_info类型的数据结构
struct thread_info {
struct task_struct *task; /* main task structure */ 8
__u32 flags; /* low level flags */ 4
__u32 status; /* thread synchronous flags */ 4
__u32 cpu; /* current CPU */ 4
mm_segment_t addr_limit; 8
unsigned int sig_on_uaccess_error:1; 4
unsigned int uaccess_err:1; /* uaccess failed */ 4
};
设置进程状态
#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
_THIS_IP_宏是作为函数内部的一个label
#define set_task_state(tsk, state_value) \
do { \
(tsk)->task_state_change = _THIS_IP_; \
smp_store_mb((tsk)->state, (state_value)); \
} while (0)
还有个设置当前进程状态的宏:
#define set_current_state(state_value) \
do { \
current->task_state_change = _THIS_IP_; \
smp_store_mb(current->state, (state_value)); \
} while (0)
等价于(tsk)->task_state_change = 函数内部此处label地址
(tsk)->state=(state_value)
4.4内核中都是讲thread_info 和内核栈定义位一个联合体
#ifdef CONFIG_KASAN
#define KASAN_STACK_ORDER 1
#else
#define KASAN_STACK_ORDER 0
#endif
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
union thread_union {
struct thread_info thread_info;
unsigned long stack[THREAD_SIZE/sizeof(long)];
};
这样实际上分配了2页内存的栈但是实际可用的是8192-sizeof(thread_info)
current_thread_info宏获取当前jtask_struct对应的thread_info 结构
static inline struct thread_info *current_thread_info(void)
{
return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
}
static inline unsigned long current_top_of_stack(void)
{
#ifdef CONFIG_X86_64
return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
#else
/* sp0 on x86_32 is special in and around vm86 mode. */
return this_cpu_read_stable(cpu_current_top_of_stack);
#endif
}
可以看到栈顶是来自于cpu_tss.x86_tss.sp0
struct x86_hw_tss {
u32 reserved1;
u64 sp0;
u64 sp1;
u64 sp2;
u64 reserved2;
u64 ist[7];
u32 reserved3;
u32 reserved4;
u16 reserved5;
u16 io_bitmap_base;
} __attribute__((packed)) ____cacheline_aligned;
linux的tss段中只使用esp0和iomap等字段,并且不用它的其他字段来保存寄存器,在一个用户进程被中断进入内核态的时候,从tss中的硬件状态结构中取出esp0(即内核栈栈顶指针),然后切到esp0,其它的寄存器则保存在esp0指的内核栈上而不保存在tss中。
struct tss_struct {
/*
* The hardware state:
*/
struct x86_hw_tss x86_tss;
/*
* The extra 1 is there because the CPU will access an
* additional byte beyond the end of the IO permission
* bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
/*
* Space for the temporary SYSENTER stack:
*/
unsigned long SYSENTER_stack[64];
} ____cacheline_aligned;
硬件状态结构 : x86_hw_tss
IO权位图 : io_bitmap
备用内核栈: stack
_visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
.x86_tss = {
.sp0 = TOP_OF_INIT_STACK,
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
#endif
},
#ifdef CONFIG_X86_32
/*
* Note that the .io_bitmap member must be extra-big. This is because
* the CPU will access an additional byte beyond the end of the IO
* permission bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
};
EXPORT_PER_CPU_SYMBOL(cpu_tss);
可以看到cpu_tss.x86_tss.sp0值来自于TOP_OF_INIT_STACK
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
TOP_OF_KERNEL_STACK_PADDING)
TOP_OF_KERNEL_STACK_PADDING x64是0也就是
2、进程遍历函数
扫描整个进程列表的for_each_process(p) 是从init_task 开始循环,一直循环到init_task止
下列是定义看的比较清楚,因为是双向循环链表,所以当循环到链表头之后则停止循环。
#define for_each_process(p) \
for (p = &init_task ; (p = next_task(p)) != &init_task ; )
struct task_struct {
struct task_struct __rcu *real_parent; /* real parent process */
struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
/*
* children/sibling forms the list of my natural children
*/
struct list_head children; /* list of my children */
struct list_head sibling; /* linkage in my parent's children list */
}
real_parent 指向创建了此进程的进程描述符,如果此进程的父进程不存在则指向1号进程
parent 指向此进程的父进程,大多数情况与real_parent一致
children 链表的头部,链表中的所有元素都是此进程创建的子进程
sibling 指向兄弟进程链表中的下一个元素或前一个元素的指针。
3、进程pid组织
3.1进程pid组织结构
struct task_struct{
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX]; //三个类型的pid_link
};
enum pid_type
{
PIDTYPE_PID, 进程 hash表
PIDTYPE_PGID, 进程组 hash表
PIDTYPE_SID, 会话hash表
PIDTYPE_MAX
};
struct pid_link
{
struct hlist_node node;
struct pid *pid;
};
struct pid
{
atomic_t count; 引用计数
unsigned int level; 该pid早ns中处于第几层,当level等于0表示是global ns
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX]; //三个类型的链表头,
struct rcu_head rcu;
struct upid numbers[1]; 指向upid结构体,表示不同的pid_namespace, numbers[0]表示global namespace
};
struct upid {
/* Try to keep pid_chain in the same cacheline as nr for find_vpid */
int nr; pid值
struct pid_namespace *ns; 指向该pid所处的namespace中
struct hlist_node pid_chain; 为pid_hash 表的节点就能够找到该upid在pid_hash中的位置
};
void attach_pid(struct task_struct *task, enum pid_type type)
{
struct pid_link *link = &task->pids[type];
hlist_add_head_rcu(&link->node, &link->pid->tasks[type]);
}
attach_pid将task的pid_link里的node链接到link里的pid里的tasks指定类型的链表上
可以从alloc_pid函数看到整体逻辑是分配了pid(非数字pid)结构体后其中的upid里的pid_chain被作为节点挂接在全局pid_hash数组,其中的索引值是pid_hashfn(upid->nr, upid->ns)也就是数字pid和ns地址做hash运算得到。
3.2 进程一些pid和task关系操作函数
1、attach_pid
void attach_pid(struct task_struct *task, enum pid_type type)
{
struct pid_link *link = &task->pids[type];
hlist_add_head_rcu(&link->node, &link->pid->tasks[type]);
}
attach_pid函数是将指定task挂接在指定类型的链表上,注意这里不是挂接到类似pid_hash全局数组,而是挂接在task使用的pid->tasks[type]上。
2、find_pid_ns
struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
struct upid *pnr;
hlist_for_each_entry_rcu(pnr,
&pid_hash[pid_hashfn(nr, ns)], pid_chain)
if (pnr->nr == nr && pnr->ns == ns)
return container_of(pnr, struct pid,
numbers[ns->level]);
return NULL;
}
find_pid_ns函数是根据传入指定的pid和传入的指定的ns通过hash在pid_hash数组元素链表里搜索相同的pid和ns的pid结构体
3、find_vpid
static inline struct pid *task_pid(struct task_struct *task)
{
return task->pids[PIDTYPE_PID].pid;
}
task_pid函数则是直接读取task里的pid结构
static inline struct pid_namespace *ns_of_pid(struct pid *pid)
{
struct pid_namespace *ns = NULL;
if (pid)
ns = pid->numbers[pid->level].ns;
return ns;
}
ns_of_pid函数是根据pid结构读取namespace
struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
{
return ns_of_pid(task_pid(tsk));
}
task_active_pid_ns函数是根据tsk获取namespace结构
struct pid *find_vpid(int nr)
{
return find_pid_ns(nr, task_active_pid_ns(current));
}
EXPORT_SYMBOL_GPL(find_vpid);
find_vpid函数是直接调用find_pid_ns查询pid结构体,但是传入的ns参数是通过 task_active_pid_ns函数产品能够task结构体查找到的ns然后根据ns和nr返回pid结构体
4、pid_nr_ns
pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
{
struct upid *upid;
pid_t nr = 0;
if (pid && ns->level <= pid->level) {
upid = &pid->numbers[ns->level];
if (upid->ns == ns)
nr = upid->nr;
}
return nr;
}
pid_nr_ns函数是根据传入的pid结构和namesapce查找nr也就是pid
5、__task_pid_nr_ns
pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
struct pid_namespace *ns)
{
pid_t nr = 0;
rcu_read_lock();
if (!ns)
ns = task_active_pid_ns(current);
if (likely(pid_alive(task))) {
if (type != PIDTYPE_PID)
task = task->group_leader;
nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
}
rcu_read_unlock();
return nr;
}
__task_pid_nr_ns函数是根据传入的task在传入的namespace里查询指定type类型的nr
6、get_pid_task
struct task_struct *pid_task(struct pid *pid, enum pid_type type)
{
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
lockdep_tasklist_lock_is_held());
if (first)
result = hlist_entry(first, struct task_struct, pids[(type)].node);
}
return result;
}
pid_task根据传入的pid和type查找对应的task结构体,查找比较简单则是根据pid结构体查找到task
pid->tasks[type]指向的是pid_link类型link->node,知道了pid_link类型的首元素node地址也就是pid_link 地址可以借助container_of(ptr,type,member)得到task结构体的地址
struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
{
struct task_struct *result;
rcu_read_lock();
result = pid_task(pid, type);
if (result)
get_task_struct(result);
rcu_read_unlock();
return result;
}
get_pid_task函数是根据传入的pid结构和type类型查找对应task结构
整体上可以利用find_vpid根据进程pid号获取到pid结构,再利用get_pid_task函数结合指定pid号对应的PIDTYPE_PID类型查找到task结构。
7、detach_pid
根据attach_pid 相反的函数detach_pid
是将task从指定type类型的pid结构体里面解绑,同时需要判定如果pid->tasks三大链表都是空则说明没有task与此pid进行了绑定则释放pid结构
static void __change_pid(struct task_struct *task, enum pid_type type,
struct pid *new)
{
struct pid_link *link;
struct pid *pid;
int tmp;
link = &task->pids[type];
pid = link->pid;
hlist_del_rcu(&link->node);
link->pid = new;
for (tmp = PIDTYPE_MAX; --tmp >= 0; )
if (!hlist_empty(&pid->tasks[tmp]))
return;
free_pid(pid);
}
void detach_pid(struct task_struct *task, enum pid_type type)
{
__change_pid(task, type, NULL);
}
8、next_thread
static inline struct task_struct *next_thread(const struct task_struct *p)
{
return list_entry_rcu(p->thread_group.next,
struct task_struct, thread_group);
}
next_thread函数是获取thread_group里的下一个task结构