故事发展到今天,看到了块设备的注册以后,也将近了尾声。在剩下的日子里面我们了解一下块设备的IO请求是如何作用在硬件上面的。
前面我们说过do_ide_request是整个IDE接口驱动处理IO请求的开始,然后会调用然后会调用start_request来处理这个任务。在上面我们分析中我们只看了343-344行,因为前面的调用中是处理REQ_TYPE_ATA_TASKFILE事务的,我们执行了execute_drive_cmd就直接返回了。对于硬盘的数据传输等过程我们就没有那么早返回了,所以应该看到369行return drv->do_request(drive, rq, blk_rq_pos(rq)),对应到我们要说的disk来说,就是在ide-gd.c中注册的ide_gd_driver。
[ide-gd.c]
163 static struct ide_driver ide_gd_driver = {
164 .gen_driver = {
165 .owner = THIS_MODULE,
166 .name = "ide-gd",
167 .bus = &ide_bus_type,
168 },
169 .probe = ide_gd_probe,
170 .remove = ide_gd_remove,
171 .resume = ide_gd_resume,
172 .shutdown = ide_gd_shutdown,
173 .version = IDE_GD_VERSION,
174 .do_request = ide_gd_do_request,
175 #ifdef CONFIG_IDE_PROC_FS
176 .proc_entries = ide_disk_proc_entries,
177 .proc_devsets = ide_disk_proc_devsets,
178 #endif
179 };
目光转向174行
[ide-gd.c]
157 static ide_startstop_t ide_gd_do_request(ide_drive_t *drive,
158 struct request *rq, sector_t sector)
159 {
160 return drive->disk_ops->do_request(drive, rq, sector);
161 }
这里实际上就是调用了disk_ops的do_request来处理request。在前面ide_gd_probe中我们已经为ide_drive_t中的disk_ops赋值为 &ide_ata_disk_ops;。那么很自然这场戏的主角就要上台了。
[ide-disk.c]
const struct ide_disk_ops ide_ata_disk_ops = {
.check = ide_disk_check,
.set_capacity = ide_disk_set_capacity,
.get_capacity = ide_disk_get_capacity,
.setup = ide_disk_setup,
.flush = ide_disk_flush,
.init_media = ide_disk_init_media,
.set_doorlock = ide_disk_set_doorlock,
.do_request = ide_do_rw_disk,
.ioctl = ide_disk_ioctl,
};
对应do_request方法的是ide_do_rw_disk,这个函数就是我们ide硬盘处理数据的关键函数了。源码如下:
[ide-disk.c]
175 /*
176 * 268435455 == 137439 MB or 28bit limit
177 * 320173056 == 163929 MB or 48bit addressing
178 * 1073741822 == 549756 MB or 48bit addressing fake drive
179 */
180
181 static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
182 sector_t block)
183 {
184 ide_hwif_t *hwif = drive->hwif;
185
186 BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED);
187 BUG_ON(!blk_fs_request(rq));
188
189 ledtrig_ide_activity();
190
191 pr_debug("%s: %sing: block=%llu, sectors=%u, buffer=0x%08lx/n",
192 drive->name, rq_data_dir(rq) == READ ? "read" : "writ",
193 (unsigned long long)block, blk_rq_sectors(rq),
194 (unsigned long)rq->buffer);
195
196 if (hwif->rw_disk)
197 hwif->rw_disk(drive, rq);
198
199 return __ide_do_rw_disk(drive, rq, block);
200 }
Linux的代码中总是在关键时候吊下胃口,这里同样是高高兴兴来看数据传输表演的,他偏偏又转到__ide_do_rw_disk(drive, rq, block)处理。没办法谁叫我们好奇呢?跟踪源码:
77 /*
78 * __ide_do_rw_disk() issues READ and WRITE commands to a disk,
79 * using LBA if supported, or CHS otherwise, to address sectors.
80 */
81 static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
82 sector_t block)
83 {
84 ide_hwif_t *hwif = drive->hwif;
85 u16 nsectors = (u16)blk_rq_sectors(rq);
86 u8 lba48 = !!(drive->dev_flags & IDE_DFLAG_LBA48);
87 u8 dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
88 struct ide_cmd cmd;
89 struct ide_taskfile *tf = &cmd.tf;
90 ide_startstop_t rc;
91
92 if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && lba48 && dma) {
93 if (block + blk_rq_sectors(rq) > 1ULL << 28)
94 dma = 0;
95 else
96 lba48 = 0;
97 }
98
99 memset(&cmd, 0, sizeof(cmd));
100 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
101 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE;
102
103 if (drive->dev_flags & IDE_DFLAG_LBA) {
104 if (lba48) {
105 pr_debug("%s: LBA=0x%012llx/n", drive->name,
106 (unsigned long long)block);
107
108 tf->nsect = nsectors & 0xff;
109 tf->lbal = (u8) block;
110 tf->lbam = (u8)(block >> 8);
111 tf->lbah = (u8)(block >> 16);
112 tf->device = ATA_LBA;
113
114 tf = &cmd.hob;
115 tf->nsect = (nsectors >> 8) & 0xff;
116 tf->lbal = (u8)(block >> 24);
117 if (sizeof(block) != 4) {
118 tf->lbam = (u8)((u64)block >> 32);
119 tf->lbah = (u8)((u64)block >> 40);
120 }
121
122 cmd.valid.out.hob = IDE_VALID_OUT_HOB;
123 cmd.valid.in.hob = IDE_VALID_IN_HOB;
124 cmd.tf_flags |= IDE_TFLAG_LBA48;
125 } else {
126 tf->nsect = nsectors & 0xff;
127 tf->lbal = block;
128 tf->lbam = block >>= 8;
129 tf->lbah = block >>= 8;
130 tf->device = ((block >> 8) & 0xf) | ATA_LBA;
131 }
132 } else {
133 unsigned int sect, head, cyl, track;
134
135 track = (int)block / drive->sect;
136 sect = (int)block % drive->sect + 1;
137 head = track % drive->head;
138 cyl = track / drive->head;
139
140 pr_debug("%s: CHS=%u/%u/%u/n", drive->name, cyl, head, sect);
141
142 tf->nsect = nsectors & 0xff;
143 tf->lbal = sect;
144 tf->lbam = cyl;
145 tf->lbah = cyl >> 8;
146 tf->device = head;
147 }
148
149 cmd.tf_flags |= IDE_TFLAG_FS;
150
151 if (rq_data_dir(rq))
152 cmd.tf_flags |= IDE_TFLAG_WRITE;
153
154 ide_tf_set_cmd(drive, &cmd, dma);
155 cmd.rq = rq;
156
157 if (dma == 0) {
158 ide_init_sg_cmd(&cmd, nsectors << 9);
159 ide_map_sg(drive, &cmd);
160 }
161
162 rc = do_rw_taskfile(drive, &cmd);
163
164 if (rc == ide_stopped && dma) {
165 /* fallback to PIO */
166 cmd.tf_flags |= IDE_TFLAG_DMA_PIO_FALLBACK;
167 ide_tf_set_cmd(drive, &cmd, 0);
168 ide_init_sg_cmd(&cmd, nsectors << 9);
169 rc = do_rw_taskfile(drive, &cmd);
170 }
171
172 return rc;
173 }
103-105行这段代码就是针对利用LBA方式访问的,也是我们用的最多的一段代码。这段里面主要就是设置了cmd,您可别小瞧了这个ide_cmd他就是__ide_do_rw_disk的通信员,上面有什么请求最后都是转换成cmd下达给硬件,也就是调用162行的rc = do_rw_taskfile(drive, &cmd); 前面我们对其进行过简单分析,这里不妨再来看一下:
[ide-taskfile.c]
77 ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd)
78 {
79 ide_hwif_t *hwif = drive->hwif;
80 struct ide_cmd *cmd = &hwif->cmd;
81 struct ide_taskfile *tf = &cmd->tf;
82 ide_handler_t *handler = NULL;
83 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
84 const struct ide_dma_ops *dma_ops = hwif->dma_ops;
85
86 if (orig_cmd->protocol == ATA_PROT_PIO &&
87 (orig_cmd->tf_flags & IDE_TFLAG_MULTI_PIO) &&
88 drive->mult_count == 0) {
89 pr_err("%s: multimode not set!/n", drive->name);
90 return ide_stopped;
91 }
92
93 if (orig_cmd->ftf_flags & IDE_FTFLAG_FLAGGED)
94 orig_cmd->ftf_flags |= IDE_FTFLAG_SET_IN_FLAGS;
95
96 memcpy(cmd, orig_cmd, sizeof(*cmd));
97
98 if ((cmd->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) {
99 ide_tf_dump(drive->name, cmd);
100 tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
101
102 if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) {
103 u8 data[2] = { cmd->tf.data, cmd->hob.data };
104
105 tp_ops->output_data(drive, cmd, data, 2);
106 }
107
108 if (cmd->valid.out.tf & IDE_VALID_DEVICE) {
109 u8 HIHI = (cmd->tf_flags & IDE_TFLAG_LBA48) ?
110 0xE0 : 0xEF;
111
112 if (!(cmd->ftf_flags & IDE_FTFLAG_FLAGGED))
113 cmd->tf.device &= HIHI;
114 cmd->tf.device |= drive->select;
115 }
116
117 tp_ops->tf_load(drive, &cmd->hob, cmd->valid.out.hob);
118 tp_ops->tf_load(drive, &cmd->tf, cmd->valid.out.tf);
119 }
120
121 switch (cmd->protocol) {
122 case ATA_PROT_PIO:
123 if (cmd->tf_flags & IDE_TFLAG_WRITE) {
124 tp_ops->exec_command(hwif, tf->command);
125 ndelay(400); /* FIXME */
126 return pre_task_out_intr(drive, cmd);
127 }
128 handler = task_pio_intr;
129 /* fall-through */
130 case ATA_PROT_NODATA:
131 if (handler == NULL)
132 handler = task_no_data_intr;
133 ide_execute_command(drive, cmd, handler, WAIT_WORSTCASE);
134 return ide_started;
135 case ATA_PROT_DMA:
136 if (ide_dma_prepare(drive, cmd))
137 return ide_stopped;
138 hwif->expiry = dma_ops->dma_timer_expiry;
139 ide_execute_command(drive, cmd, ide_dma_intr, 2 * WAIT_CMD);
140 dma_ops->dma_start(drive);
141 default:
142 return ide_started;
143 }
对于我们使用PIO方式的数据传输来说,最关心的莫过于122-129行,前面我们说的是130-134行。但是这里我们要注意一个问题,就是PIO方式的读写在这里就分开了。对于写方式走的是126行return pre_task_out_intr(drive, cmd); 对于读方式会设置一个handler以后就跳到133行,这是最基本的C语言语法了,好了我们按顺序来看。首先是pre_task_out_intr,源码如下;
[ide-taskfile.c]
403 static ide_startstop_t pre_task_out_intr(ide_drive_t *drive,
404 struct ide_cmd *cmd)
405 {
406 ide_startstop_t startstop;
407
408 if (ide_wait_stat(&startstop, drive, ATA_DRQ,
409 drive->bad_wstat, WAIT_DRQ)) {
410 pr_err("%s: no DRQ after issuing %sWRITE%s/n", drive->name,
411 (cmd->tf_flags & IDE_TFLAG_MULTI_PIO) ? "MULT" : "",
412 (drive->dev_flags & IDE_DFLAG_LBA48) ? "_EXT" : "");
413 return startstop;
414 }
415
416 if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
417 local_irq_disable();
418
419 ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
420
421 ide_pio_datablock(drive, cmd, 1);
422
423 return ide_started;
424 }
408行这个前面讲硬盘的RDY和BUSY的时候已经说的很详细了,就不再多说。剩下的问题就落实到419行,ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);赤裸裸地将&task_pio_intr中断向量给了前面所说的那个NB的共享中断向量,当然这里也就把我们前面说到的那个用于超时的timer进行了设置,前面说过这里就不详述了。关于这个向量的具体内容等我们看完读操作那边的函数再来分析,两者所用的handler是一样的。
421行这一行可以说是我们PIO传输的关键中的关键,空话就不说了,直接看代码:
[ide-taskfile.c]
281 static void ide_pio_datablock(ide_drive_t *drive, struct ide_cmd *cmd,
282 unsigned int write)
283 {
284 unsigned int nr_bytes;
285
286 u8 saved_io_32bit = drive->io_32bit;
287
288 if (cmd->tf_flags & IDE_TFLAG_FS)
289 cmd->rq->errors = 0;
290
291 if (cmd->tf_flags & IDE_TFLAG_IO_16BIT)
292 drive->io_32bit = 0;
293
294 touch_softlockup_watchdog();
295
296 if (cmd->tf_flags & IDE_TFLAG_MULTI_PIO)
297 nr_bytes = min_t(unsigned, cmd->nleft, drive->mult_count << 9);
298 else
299 nr_bytes = SECTOR_SIZE;
300
301 ide_pio_bytes(drive, cmd, write, nr_bytes);
302
303 drive->io_32bit = saved_io_32bit;
304 }
毫无疑问301行成了万花丛中的一点红,真实的数据传输就要到来了:
[ide-taskfile.c]
222 void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd,
223 unsigned int write, unsigned int len)
224 {
225 ide_hwif_t *hwif = drive->hwif;
226 struct scatterlist *sg = hwif->sg_table;
227 struct scatterlist *cursg = cmd->cursg;
228 unsigned long uninitialized_var(flags);
229 struct page *page;
230 unsigned int offset;
231 u8 *buf;
232
233 cursg = cmd->cursg;
234 if (cursg == NULL)
235 cursg = cmd->cursg = sg;
236
237 while (len) {
238 unsigned nr_bytes = min(len, cursg->length - cmd->cursg_ofs);
239 int page_is_high;
240
241 if (nr_bytes > PAGE_SIZE)
242 nr_bytes = PAGE_SIZE;
243
244 page = sg_page(cursg);
245 offset = cursg->offset + cmd->cursg_ofs;
246
247 /* get the current page and offset */
248 page = nth_page(page, (offset >> PAGE_SHIFT));
249 offset %= PAGE_SIZE;
250
251 page_is_high = PageHighMem(page);
252 if (page_is_high)
253 local_irq_save(flags);
254
255 buf = kmap_atomic(page, KM_BIO_SRC_IRQ) + offset;
256
257 cmd->nleft -= nr_bytes;
258 cmd->cursg_ofs += nr_bytes;
259
260 if (cmd->cursg_ofs == cursg->length) {
261 cursg = cmd->cursg = sg_next(cmd->cursg);
262 cmd->cursg_ofs = 0;
263 }
264
265 /* do the actual data transfer */
266 if (write)
267 hwif->tp_ops->output_data(drive, cmd, buf, nr_bytes);
268 else
269 hwif->tp_ops->input_data(drive, cmd, buf, nr_bytes);
270
271 kunmap_atomic(buf, KM_BIO_SRC_IRQ);
272
273 if (page_is_high)
274 local_irq_restore(flags);
275
276 len -= nr_bytes;
277 }
278 }
266-269行就是真正数据传送的关键了,这个函数完成了以后,硬盘会将数据从cash中一个个写到硬盘的物理扇区上,等到写完以后会引发一个中断,经过七七四十九难过后,最终会调用task_pio_intr来进行善后工作。总的来讲这个函数比较简单就不在详细分析了。
然而,在读硬盘中使用的是ide_execute_command,这个我们前面处理ATA_PROT_NODATA的时候已经说过了,比较简单,可以自己进去看一下,就是执行力一个命令。这里就不再展开说了,最后的问题就集中到了task_pio_intr,下面就来一睹芳容….
[ide-taskfile.c]
341 /*
342 * Handler for command with PIO data phase.
343 */
344 static ide_startstop_t task_pio_intr(ide_drive_t *drive)
345 {
346 ide_hwif_t *hwif = drive->hwif;
347 struct ide_cmd *cmd = &drive->hwif->cmd;
348 u8 stat = hwif->tp_ops->read_status(hwif);
349 u8 write = !!(cmd->tf_flags & IDE_TFLAG_WRITE);
350
351 if (write == 0) {
352 /* Error? */
353 if (stat & ATA_ERR)
354 goto out_err;
355
356 /* Didn't want any data? Odd. */
357 if ((stat & ATA_DRQ) == 0) {
358 /* Command all done? */
359 if (OK_STAT(stat, ATA_DRDY, ATA_BUSY))
360 goto out_end;
361
362 /* Assume it was a spurious irq */
363 goto out_wait;
364 }
365 } else {
366 if (!OK_STAT(stat, DRIVE_READY, drive->bad_wstat))
367 goto out_err;
368
369 /* Deal with unexpected ATA data phase. */
370 if (((stat & ATA_DRQ) == 0) ^ (cmd->nleft == 0))
371 goto out_err;
372 }
373
374 if (write && cmd->nleft == 0)
375 goto out_end;
376
377 /* Still data left to transfer. */
378 ide_pio_datablock(drive, cmd, write);
379
380 /* Are we done? Check status and finish transfer. */
381 if (write == 0 && cmd->nleft == 0) {
382 stat = wait_drive_not_busy(drive);
383 if (!OK_STAT(stat, 0, BAD_STAT))
384 goto out_err;
385
386 goto out_end;
387 }
388 out_wait:
389 /* Still data left to transfer. */
390 ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
391 return ide_started;
392 out_end:
393 if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
394 ide_finish_cmd(drive, cmd, stat);
395 else
396 ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9);
397 return ide_stopped;
398 out_err:
399 ide_error_cmd(drive, cmd);
400 return ide_error(drive, __func__, stat);
401 }
378行ide_pio_datablock是为读数据而设置的,读数据时只有请求的数据全部写入cash以后会触发中断,也就是在这个中断处理当中,读函数实现了自己的价值。函数中的其他内容就是一些善后的工作了,前面能说的都已经说过了,这里就不重复了。
关于IDE的驱动,重点的内容都已经分析完了,存在很多不足和错误之处,这里旨在给出一个分析linux驱动的方法。分析完一个子系统的驱动,其他的子系统也就大同小异了。