《深入理解Linux内核（第三版）》笔记（五），第四章中断和异常

伟歌的深夜键盘

已于 2023-11-01 09:02:08 修改

阅读量432

点赞数

分类专栏：《深入理解Linux内核（第三版）》笔记文章标签： linux

于 2022-01-04 15:50:22 首次发布

本文链接：https://blog.youkuaiyun.com/weixin_42346852/article/details/122304758

版权

《深入理解Linux内核（第三版）》笔记专栏收录该内容

12 篇文章

订阅专栏

本文探讨了Linux中断和异常的区别，中断描述符的结构与初始化，中断嵌套限制，IRQ数据结构及其在驱动开发中的应用，包括init_IRQ、do_IRQ、软中断和工作队列的工作原理。通过深入剖析中断调用流程和内核栈使用，揭示了中断服务例程的执行机制和软中断的调度功能。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

总述

异常和中断的概念还是有区别的。
这导致对异常的处理可以不需要中断向量表，因为异常是同步的。只要给当前进程发信号即可。
但是中断是异步的，需要中断向量表。

IRQ: Interrupt ReQuest
ISR: Interrupt Service Routine

中断描述符

IDT: Interrupt Descriptor Table
和芯片架构相关，简单分析下就好。

// arch/i386/kernel/traps.c/line: 72
struct desc_struct idt_table[256] __attribute__((__section__(".data.idt")))
	= { {0, 0}, };

// include/asm-i386/processor.h/line: 27
struct desc_struct {
	unsigned long a,b;
};

// arch/i386/kernel/traps.c/line: 966
void set_intr_gate(unsigned int n, void *addr)

// arch/i386/kernel/traps.c/line: 995
void __init trap_init(void)

中断异常的嵌套

中断和异常会代表当前进程在内核态执行单独的指令序列。也就是说，名义上是当前进程占用了 CPU，但是实际上做的事情，不一定和当前进程有关（异常往往有关，中断大概率无关）。

看起来，硬件是支持中断异常嵌套的。但是，内核貌似做了一定的限制。
当 thread_info.preempt_count 不等于 0 时，内核禁止抢占功能。

IRQ 数据结构

相关数据结构比较复杂，并且完成了硬件抽象，主体部分是公共代码。
这几个数据结构，和驱动开发的关系也非常密切。

// kernel/irq/handle.c/line: 31
irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
	[0 ... NR_IRQS-1] = {
		.handler = &no_irq_type,
		.lock = SPIN_LOCK_UNLOCKED
	}
};

// include/linux/irq.h/line: 61
typedef struct irq_desc {
	hw_irq_controller *handler;
	void *handler_data;
	struct irqaction *action;	/* IRQ action list */
	unsigned int status;		/* IRQ status */
	unsigned int depth;		/* nested irq disables */
	unsigned int irq_count;		/* For detecting broken interrupts */
	unsigned int irqs_unhandled;
	spinlock_t lock;
} ____cacheline_aligned irq_desc_t;

// include/linux/irq.h/line: 41
struct hw_interrupt_type {
	...		// 7 个方法
}
typedef struct hw_interrupt_type  hw_irq_controller;


// include/linux/interrupt.h/line: 36
struct irqaction {
	irqreturn_t (*handler)(int, void *, struct pt_regs *);
	unsigned long flags;
	cpumask_t mask;
	const char *name;
	void *dev_id;
	struct irqaction *next;
	int irq;
	struct proc_dir_entry *dir;
};

// include/linux/irq_cpustat.h/line: 21
extern irq_cpustat_t irq_stat[];		/* defined in asm/hardirq.h */

// include/asm-i386/hardirq.h/line: 8
typedef struct {
	...
} ____cacheline_aligned irq_cpustat_t;

init_IRQ()

// arch/i386/kernel/i8259.c/line: 391
void __init init_IRQ(void)
{
	...
	set_intr_gate(vector, interrupt[i]);
	...
}

// arch/i386/kernel/traps.c/line: 966
void set_intr_gate(unsigned int n, void *addr)
{
	_set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
	// idt_table 是一个全局变量，见上文分析
}
// _set_gate 是 x86 的汇编函数，不要分析了；但是还是很奇怪，一个 IDT 里面怎么能放下一个地址

内核栈

中断处理的时间片和栈，算到当前的进程中；
如果 thread_union 大小为 8K，那么异常、中断等共用进程的栈
如果大小为4K，那么异常栈和进程共用，硬中断和软中断使用独立的栈

// arch/i386/kernel/irq.c/line: 30
#ifdef CONFIG_4KSTACKS
/*
 * per-CPU IRQ handling contexts (thread information and stack)
 */
union irq_ctx {
	struct thread_info      tinfo;
	u32                     stack[THREAD_SIZE/sizeof(u32)];
};
static union irq_ctx *hardirq_ctx[NR_CPUS];
static union irq_ctx *softirq_ctx[NR_CPUS];
#endif
// 这是个数组，指针数组，共用体的指针数组，需要开辟一块空间，然后用数组元素指向这个空间

// arch/i386/kernel/irq.c/line: 109
#ifdef CONFIG_4KSTACKS
static char softirq_stack[NR_CPUS * THREAD_SIZE]
		__attribute__((__aligned__(THREAD_SIZE)));
static char hardirq_stack[NR_CPUS * THREAD_SIZE]
		__attribute__((__aligned__(THREAD_SIZE)));

上面两个数据结构是下面语句联系起来的：

// arch/i386/kernel/irq.c/line: 131
irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
...
hardirq_ctx[cpu] = irqctx;

IRQ 线的动态分配

这个很关键啦。
单片机上基本都是写死的，但是 Linux 需要也能够支持 IRQ 线的动态分配。

// kernel/irq/manage.c/line: 310
int request_irq(unsigned int irq,
		irqreturn_t (*handler)(int, void *, struct pt_regs *),
		unsigned long irqflags, const char * devname, void *dev_id)
{
	struct irqaction * action;
	...
	action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
	...
	retval = setup_irq(irq, action);
	...
}
// 这个函数会在驱动程序中被调用，《第三版》有介绍

// kernel/irq/manage.c/line: 153
int setup_irq(unsigned int irq, struct irqaction * new)
{
	struct irq_desc *desc = irq_desc + irq;
	...
}

中断调用流程

汇编入口

/*
 * Build the entry stubs and pointer table with
 * some assembler magic.
 */
.data
ENTRY(interrupt)
.text

vector=0
ENTRY(irq_entries_start)
.rept NR_IRQS
	ALIGN
1:	pushl $vector-256
	jmp common_interrupt
.data
	.long 1b
.text
vector=vector+1
.endr

	ALIGN
common_interrupt:
	SAVE_ALL
	movl %esp,%eax
	call do_IRQ
	jmp ret_from_intr

// 这段代码没看太明白；尤其是 vector 的索引相关的数值到底放到哪里了

do_IRQ()

// arch/i386/kernel/irq.c/line: 48
fastcall unsigned int do_IRQ(struct pt_regs *regs)
{
	...
	__do_IRQ(irq, regs);
	...
}
// 这个函数和硬件相关；
// 其作用是准备好硬件环境，比如说堆栈，然后调用 __do_IRQ()
// __do_IRQ() 是硬件无关的函数

__do_IRQ()

// kernel/irq/handle.c/line: 106
fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
{
	irq_desc_t *desc = irq_desc + irq;
	...
	// irq_desc 是数组，元素和中断号绑定；所以自旋锁只会锁对应的中断
	// 但是，同时会锁定该中断对应的所有操作
	spin_lock(&desc->lock);
	...		// 做一些标志判断，《第三版》上有详细解析
	action_ret = handle_IRQ_event(irq, regs, action);	// 执行中断服务例程
	...
}

handle_IRQ_event()

// kernel/irq/handle.c/line: 78
fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
				struct irqaction *action)
{
	...
	do {
		ret = action->handler(irq, action->dev_id, regs);
		...
		action = action->next;
	} while (action);
	...
}
// 这里有个问题没有理解，函数会遍历所有的 action；可是，前面也说了，一个 IRQ 线上可能会有多个设备
// 如果一个设备触发中断了，需要遍历所有的 action 吗？

软中断

软中断数据结构

// kernel/softirq.c/line: 44
static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;

// include/linux/interrupt.h/line: 117
struct softirq_action
{
	void	(*action)(struct softirq_action *);
	void	*data;
};

// include/linux/hardirq.h/line: 63
#define in_interrupt()		(irq_count())

// include/linux/hardirq.h/line: 55
#define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))

// include/linux/preempt.h/line: 23
#define preempt_count()	(current_thread_info()->preempt_count)

软中断开启

// kernel/softirq.c/line: 204
void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)

// 按照前面的理解，软中断就 6 个；这个函数分别在下面的函数中被初始化：
// kernel/softirq.c/line: 344
void __init softirq_init(void)	// TASKLET_SOFTIRQ, HI_SOFTIRQ
// kernel/timer.c/line: 1389
void __init init_timers(void)	// TIMER_SOFTIRQ
// net/core/dev.c/line: 3243
static int __init net_dev_init(void)	// NET_TX_SOFTIRQ, NET_RX_SOFTIRQ
// drivers/scsi/scsi.c/line: 1250
static int __init init_scsi(void)		// SCSI_SOFTIRQ

do_softirq()

如果内核栈是 4k，则使用下面的函数：

// arch/i386/kernel/irq.c/line: 155
asmlinkage void do_softirq(void)

否则使用：

// kernel/softirq.c/line: 116
asmlinkage void do_softirq(void)

两个函数的具体实现类似，《第三版》有解析。核心都会调用 __do_softirq()

__do_softirq()

// kernel/softirq.c/line: 74
asmlinkage void __do_softirq(void)
// 这是个公共函数，和硬件无关，《第三版》有解析

ksoftirqd 内核线程

// kernel/softirq.c/line: 350
static int ksoftirqd(void * __bind_cpu)
{
	...
	while (!kthread_should_stop()) {
		...
		while (local_softirq_pending()) {
			preempt_disable();
			...
			do_softirq();
			preempt_enable();
			cond_resched();
		}
		...
	}
	...
}
// 核心是对 do_softirq() 的调用

ksoftirqd 的创建流程：

init() 线程
->do_pre_smp_initcalls()
	-> spawn_ksoftirqd()
		-> cpu_callback()
			-> 创建 ksoftirqd() 内核线程

tasklet

工作队列

工作队列的数据结构

工作队列的数据结构的依赖关系有点小复杂：

// include/linux/workqueue.h/line: 12
struct workqueue_struct;	// 在 .h 里声明；在同名的 .c 里定义

struct work_struct {	// 这是一个双向链表的节点实体
	unsigned long pending;
	struct list_head entry;		// 关联相关的双向链表
	void (*func)(void *);
	void *data;
	void *wq_data;
	struct timer_list timer;
};

// kernel/workqueue.c/line: 38
struct cpu_workqueue_struct {
	...
	struct list_head worklist;		// 双向链表，链接到的结构体是: struct work_struct
	...
	struct workqueue_struct *wq;	// 被谁包含
	task_t *thread;
	...
} ____cacheline_aligned;

// kernel/workqueue.c/line: 59
struct workqueue_struct {	// 总的数据结构
	struct cpu_workqueue_struct cpu_wq[NR_CPUS];	// 每个 CPU 都预备一个
	const char *name;
	struct list_head list; 	/* Empty if single thread */
};

相关操作

// include/linux/workqueue.h/line: 53
extern struct workqueue_struct *__create_workqueue(const char *name,
						    int singlethread);
#define create_workqueue(name) __create_workqueue((name), 0)
#define create_singlethread_workqueue(name) __create_workqueue((name), 1)
// 两个宏定义，基于一个具体实现

__create_workqueue()

// kernel/workqueue.c/line: 304
struct workqueue_struct *__create_workqueue(const char *name, int singlethread)
{
	...
	struct workqueue_struct *wq;
	struct task_struct *p;
	...
	wq = kmalloc(sizeof(*wq), GFP_KERNEL);
	// 分配创建 workqueue_struct，一个结构体实体对应一个队列
	// __create_workqueue 每次被调用，只会申请一个 workqueue_struct 空间
	...
	if (singlethread) {
		INIT_LIST_HEAD(&wq->list);
		p = create_workqueue_thread(wq, 0);	// 核心代码，下文有分解
		...
	} else {
		spin_lock(&workqueue_lock);
		list_add(&wq->list, &workqueues);
		spin_unlock(&workqueue_lock);
		for_each_online_cpu(cpu) {
			p = create_workqueue_thread(wq, cpu);
			if (p) {
				kthread_bind(p, cpu);
				wake_up_process(p);
			} else
				destroy = 1;
		}
	}
	...
	return wq;
}

create_workqueue_thread()

// kernel/workqueue.c/line: 279
static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, int cpu)
{
	struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
	struct task_struct *p;
	...
	cwq->wq = wq;
	cwq->thread = NULL;
	...
	p = kthread_create(worker_thread, cwq, "%s/%d", wq->name, cpu);
	...
	cwq->thread = p;
	return p;
}
// 这个 kthread_create() 和之前分析的 kernel_thread() 看起来很像，但是不要混淆
// 看起来，前者是后者的进一步封装

// kernel/kthread.c/line: 122
struct task_struct *kthread_create(int (*threadfn)(void *data), void *data,
	const char namefmt[], ...)
{
	struct kthread_create_info create;
	DECLARE_WORK(work, keventd_create_kthread, &create);
	...
	queue_work(helper_wq, &work);	// 这个函数的调用流后面有拆解
	...
	return create.result;
}
// 这个函数不细拆了，重点看下内核线程的创建过程
// struct work_struct 承载的功能是把函数插入到工作队列，插入工作由 queue_work() 函数完成

// include/linux/workqueue.h/line: 30
#define DECLARE_WORK(n, f, d)					\
	struct work_struct n = __WORK_INITIALIZER(n, f, d)

// include/linux/workqueue.h/line: 23
#define __WORK_INITIALIZER(n, f, d) {				\
	.entry	= { &(n).entry, &(n).entry },			\
	.func = (f),						\
	.data = (d),						\
	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
	}

// 重点要关注的是 keventd_create_kthread 这个参数，他是 work_struct 的 .func
// kernel/kthread.c/line: 106
static void keventd_create_kthread(void *_create)
{
	...
	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
	...
}
// kernel/kthread.c/line: 67
static int kthread(void *_create)
// 这个函数，看起来里面没有循环，也就是这个线程执行完了就退出（猜测）

// kernel/workqueue.c/line: 97
int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
	...
	__queue_work(wq->cpu_wq + cpu, work);
	...
}

// kernel/workqueue.c/line: 77
static void __queue_work(struct cpu_workqueue_struct *cwq, struct work_struct *work)

目前来看，对工作队列的理解还不够充分，也不清楚它主要是要解决什么问题。
暂时先略过，后面有接触的时候再回来看。

从中断和异常返回

先重温下 thread_info 结构体

// include/asm-i386/thread_info.h/line: 28
struct thread_info {
	...
	unsigned long		flags;		/* low level flags */
	unsigned long		status;		/* thread-synchronous flags */
	__u32			cpu;		/* current CPU */
	__s32			preempt_count; /* 0 => preemptable, <0 => BUG */
	...
};
// 各进程的 thread_info 是常驻内存的，x86的 ebp 寄存器存有当前进程的 thread_info 的地址
// 其中，flags 存放的标志中，就有进程挂起返回的标志
// 基于前面的信息，中断处理占用的是当前进程的时间片，所以从中断退出时，原则上会返回到当前进程
// 但是实际的返回行为以 flags 为准

// 其中，preempt_count 标识内核是否允许抢占
// 如果内核允许抢占的话，中断的返回会复杂一些