How to get request_id of previous request in a request set?

本文介绍了一个Oracle PL/SQL函数,该函数用于通过指定的并发程序名称来获取其前置请求ID。此过程涉及从数据库中读取请求记录及其父级请求信息,并通过连接遍历的方式找到符合条件的第一个请求。

  FUNCTION get_prev_req_id(p_conc_program IN VARCHAR2) RETURN VARCHAR2
  IS
    l_parent_req_id         NUMBER;
    l_request_id            NUMBER;
  BEGIN
    SELECT parent_request_id
      INTO l_parent_req_id
      FROM fnd_concurrent_requests
     WHERE request_id = FND_PROFILE.VALUE('CONC_REQUEST_ID');
    FOR c IN (SELECT request_id,fcpt.user_concurrent_program_name
                FROM (SELECT     request_id, concurrent_program_id
                            FROM fnd_concurrent_requests aaa
                      START WITH request_id = l_parent_req_id
                      CONNECT BY PRIOR request_id = parent_request_id) cps, fnd_concurrent_programs_tl fcpt
               WHERE cps.concurrent_program_id = fcpt.concurrent_program_id
                 AND fcpt.user_concurrent_program_name LIKE p_conc_program
               ORDER BY request_id)
    LOOP
      l_request_id := c.request_id;
      EXIT;
    END LOOP;
    RETURN l_request_id ;
  END;

 


 

xref: /casio_MT6878_16.0.0_master/vnd/kernel-6.1/block/blk-core.c HomeAnnotateLine# Scopes# Navigate#Raw Download current directory 1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 5 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 6 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 7 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> 8 * - July2000 9 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 10 */ 11 12 /* 13 * This handles all read/write requests to block devices 14 */ 15 #include <linux/kernel.h> 16 #include <linux/module.h> 17 #include <linux/bio.h> 18 #include <linux/blkdev.h> 19 #include <linux/blk-pm.h> 20 #include <linux/blk-integrity.h> 21 #include <linux/highmem.h> 22 #include <linux/mm.h> 23 #include <linux/pagemap.h> 24 #include <linux/kernel_stat.h> 25 #include <linux/string.h> 26 #include <linux/init.h> 27 #include <linux/completion.h> 28 #include <linux/slab.h> 29 #include <linux/swap.h> 30 #include <linux/writeback.h> 31 #include <linux/task_io_accounting_ops.h> 32 #include <linux/fault-inject.h> 33 #include <linux/list_sort.h> 34 #include <linux/delay.h> 35 #include <linux/ratelimit.h> 36 #include <linux/pm_runtime.h> 37 #include <linux/t10-pi.h> 38 #include <linux/debugfs.h> 39 #include <linux/bpf.h> 40 #include <linux/part_stat.h> 41 #include <linux/sched/sysctl.h> 42 #include <linux/blk-crypto.h> 43 44 #define CREATE_TRACE_POINTS 45 #include <trace/events/block.h> 46 47 #include "blk.h" 48 #ifndef __GENKSYMS__ 49 #include "blk-mq-debugfs.h" 50 #endif 51 #include "blk-mq-sched.h" 52 #include "blk-pm.h" 53 #include "blk-cgroup.h" 54 #include "blk-throttle.h" 55 #include "blk-ioprio.h" 56 57 #ifdef CONFIG_BLK_MQ_USE_LOCAL_THREAD 58 extern long bio_cnt; // total bio sumbit 59 extern long rt_bio_cnt; // total rt bio sumbit, part of bio_cnt 60 extern long ux_bio_cnt; // total ux bio sumbit, part of rt_bio_cnt 61 #endif 62 63 struct dentry *blk_debugfs_root; 64 65 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); 66 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); 67 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); 68 EXPORT_TRACEPOINT_SYMBOL_GPL(block_split); 69 EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); 70 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_insert); 71 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_queue); 72 EXPORT_TRACEPOINT_SYMBOL_GPL(block_getrq); 73 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_issue); 74 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_merge); 75 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_requeue); 76 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_complete); 77 78 DEFINE_IDA(blk_queue_ida); 79 80 /* 81 * For queue allocation 82 */ 83 struct kmem_cache *blk_requestq_cachep; 84 struct kmem_cache *blk_requestq_srcu_cachep; 85 86 /* 87 * Controlling structure to kblockd 88 */ 89 static struct workqueue_struct *kblockd_workqueue; 90 91 /** 92 * blk_queue_flag_set - atomically set a queue flag 93 * @flag: flag to be set 94 * @q: request queue 95 */ 96 void blk_queue_flag_set(unsigned int flag, struct request_queue *q) 97 { 98 set_bit(flag, &q->queue_flags); 99 } 100 EXPORT_SYMBOL(blk_queue_flag_set); 101 102 /** 103 * blk_queue_flag_clear - atomically clear a queue flag 104 * @flag: flag to be cleared 105 * @q: request queue 106 */ 107 void blk_queue_flag_clear(unsigned int flag, struct request_queue *q) 108 { 109 clear_bit(flag, &q->queue_flags); 110 } 111 EXPORT_SYMBOL(blk_queue_flag_clear); 112 113 /** 114 * blk_queue_flag_test_and_set - atomically test and set a queue flag 115 * @flag: flag to be set 116 * @q: request queue 117 * 118 * Returns the previous value of @flag - 0 if the flag was not set and 1 if 119 * the flag was already set. 120 */ 121 bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q) 122 { 123 return test_and_set_bit(flag, &q->queue_flags); 124 } 125 EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set); 126 127 #define REQ_OP_NAME(name) [REQ_OP_##name] = #name 128 static const char *const blk_op_name[] = { 129 REQ_OP_NAME(READ), 130 REQ_OP_NAME(WRITE), 131 REQ_OP_NAME(FLUSH), 132 REQ_OP_NAME(DISCARD), 133 REQ_OP_NAME(SECURE_ERASE), 134 REQ_OP_NAME(ZONE_RESET), 135 REQ_OP_NAME(ZONE_RESET_ALL), 136 REQ_OP_NAME(ZONE_OPEN), 137 REQ_OP_NAME(ZONE_CLOSE), 138 REQ_OP_NAME(ZONE_FINISH), 139 REQ_OP_NAME(ZONE_APPEND), 140 REQ_OP_NAME(WRITE_ZEROES), 141 REQ_OP_NAME(DRV_IN), 142 REQ_OP_NAME(DRV_OUT), 143 }; 144 #undef REQ_OP_NAME 145 146 /** 147 * blk_op_str - Return string XXX in the REQ_OP_XXX. 148 * @op: REQ_OP_XXX. 149 * 150 * Description: Centralize block layer function to convert REQ_OP_XXX into 151 * string format. Useful in the debugging and tracing bio or request. For 152 * invalid REQ_OP_XXX it returns string "UNKNOWN". 153 */ 154 inline const char *blk_op_str(enum req_op op) 155 { 156 const char *op_str = "UNKNOWN"; 157 158 if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op]) 159 op_str = blk_op_name[op]; 160 161 return op_str; 162 } 163 EXPORT_SYMBOL_GPL(blk_op_str); 164 165 static const struct { 166 int errno; 167 const char *name; 168 } blk_errors[] = { 169 [BLK_STS_OK] = { 0, "" }, 170 [BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" }, 171 [BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" }, 172 [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" }, 173 [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" }, 174 [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" }, 175 [BLK_STS_NEXUS] = { -EBADE, "critical nexus" }, 176 [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" }, 177 [BLK_STS_PROTECTION] = { -EILSEQ, "protection" }, 178 [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" }, 179 [BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" }, 180 [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" }, 181 [BLK_STS_OFFLINE] = { -ENODEV, "device offline" }, 182 183 /* device mapper special case, should not leak out: */ 184 [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" }, 185 186 /* zone device specific errors */ 187 [BLK_STS_ZONE_OPEN_RESOURCE] = { -ETOOMANYREFS, "open zones exceeded" }, 188 [BLK_STS_ZONE_ACTIVE_RESOURCE] = { -EOVERFLOW, "active zones exceeded" }, 189 190 /* everything else not covered above: */ 191 [BLK_STS_IOERR] = { -EIO, "I/O" }, 192 }; 193 194 blk_status_t errno_to_blk_status(int errno) 195 { 196 int i; 197 198 for (i = 0; i < ARRAY_SIZE(blk_errors); i++) { 199 if (blk_errors[i].errno == errno) 200 return (__force blk_status_t)i; 201 } 202 203 return BLK_STS_IOERR; 204 } 205 EXPORT_SYMBOL_GPL(errno_to_blk_status); 206 207 int blk_status_to_errno(blk_status_t status) 208 { 209 int idx = (__force int)status; 210 211 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors))) 212 return -EIO; 213 return blk_errors[idx].errno; 214 } 215 EXPORT_SYMBOL_GPL(blk_status_to_errno); 216 217 const char *blk_status_to_str(blk_status_t status) 218 { 219 int idx = (__force int)status; 220 221 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors))) 222 return "<null>"; 223 return blk_errors[idx].name; 224 } 225 226 /** 227 * blk_sync_queue - cancel any pending callbacks on a queue 228 * @q: the queue 229 * 230 * Description: 231 * The block layer may perform asynchronous callback activity 232 * on a queue, such as calling the unplug function after a timeout. 233 * A block device may call blk_sync_queue to ensure that any 234 * such activity is cancelled, thus allowing it to release resources 235 * that the callbacks might use. The caller must already have made sure 236 * that its ->submit_bio will not re-add plugging prior to calling 237 * this function. 238 * 239 * This function does not cancel any asynchronous activity arising 240 * out of elevator or throttling code. That would require elevator_exit() 241 * and blkcg_exit_queue() to be called with queue lock initialized. 242 * 243 */ 244 void blk_sync_queue(struct request_queue *q) 245 { 246 del_timer_sync(&q->timeout); 247 cancel_work_sync(&q->timeout_work); 248 } 249 EXPORT_SYMBOL(blk_sync_queue); 250 251 /** 252 * blk_set_pm_only - increment pm_only counter 253 * @q: request queue pointer 254 */ 255 void blk_set_pm_only(struct request_queue *q) 256 { 257 atomic_inc(&q->pm_only); 258 } 259 EXPORT_SYMBOL_GPL(blk_set_pm_only); 260 261 void blk_clear_pm_only(struct request_queue *q) 262 { 263 int pm_only; 264 265 pm_only = atomic_dec_return(&q->pm_only); 266 WARN_ON_ONCE(pm_only < 0); 267 if (pm_only == 0) 268 wake_up_all(&q->mq_freeze_wq); 269 } 270 EXPORT_SYMBOL_GPL(blk_clear_pm_only); 271 272 /** 273 * blk_put_queue - decrement the request_queue refcount 274 * @q: the request_queue structure to decrement the refcount for 275 * 276 * Decrements the refcount of the request_queue kobject. When this reaches 0 277 * we'll have blk_release_queue() called. 278 * 279 * Context: Any context, but the last reference must not be dropped from 280 * atomic context. 281 */ 282 void blk_put_queue(struct request_queue *q) 283 { 284 kobject_put(&q->kobj); 285 } 286 EXPORT_SYMBOL(blk_put_queue); 287 288 void blk_queue_start_drain(struct request_queue *q) 289 { 290 /* 291 * When queue DYING flag is set, we need to block new req 292 * entering queue, so we call blk_freeze_queue_start() to 293 * prevent I/O from crossing blk_queue_enter(). 294 */ 295 blk_freeze_queue_start(q); 296 if (queue_is_mq(q)) 297 blk_mq_wake_waiters(q); 298 /* Make blk_queue_enter() reexamine the DYING flag. */ 299 wake_up_all(&q->mq_freeze_wq); 300 } 301 302 /** 303 * blk_queue_enter() - try to increase q->q_usage_counter 304 * @q: request queue pointer 305 * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PM 306 */ 307 int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) 308 { 309 const bool pm = flags & BLK_MQ_REQ_PM; 310 311 while (!blk_try_enter_queue(q, pm)) { 312 if (flags & BLK_MQ_REQ_NOWAIT) 313 return -EAGAIN; 314 315 /* 316 * read pair of barrier in blk_freeze_queue_start(), we need to 317 * order reading __PERCPU_REF_DEAD flag of .q_usage_counter and 318 * reading .mq_freeze_depth or queue dying flag, otherwise the 319 * following wait may never return if the two reads are 320 * reordered. 321 */ 322 smp_rmb(); 323 wait_event(q->mq_freeze_wq, 324 (!q->mq_freeze_depth && 325 blk_pm_resume_queue(pm, q)) || 326 blk_queue_dying(q)); 327 if (blk_queue_dying(q)) 328 return -ENODEV; 329 } 330 331 return 0; 332 } 333 334 int __bio_queue_enter(struct request_queue *q, struct bio *bio) 335 { 336 while (!blk_try_enter_queue(q, false)) { 337 struct gendisk *disk = bio->bi_bdev->bd_disk; 338 339 if (bio->bi_opf & REQ_NOWAIT) { 340 if (test_bit(GD_DEAD, &disk->state)) 341 goto dead; 342 bio_wouldblock_error(bio); 343 return -EAGAIN; 344 } 345 346 /* 347 * read pair of barrier in blk_freeze_queue_start(), we need to 348 * order reading __PERCPU_REF_DEAD flag of .q_usage_counter and 349 * reading .mq_freeze_depth or queue dying flag, otherwise the 350 * following wait may never return if the two reads are 351 * reordered. 352 */ 353 smp_rmb(); 354 wait_event(q->mq_freeze_wq, 355 (!q->mq_freeze_depth && 356 blk_pm_resume_queue(false, q)) || 357 test_bit(GD_DEAD, &disk->state)); 358 if (test_bit(GD_DEAD, &disk->state)) 359 goto dead; 360 } 361 362 return 0; 363 dead: 364 bio_io_error(bio); 365 return -ENODEV; 366 } 367 368 void blk_queue_exit(struct request_queue *q) 369 { 370 percpu_ref_put(&q->q_usage_counter); 371 } 372 373 static void blk_queue_usage_counter_release(struct percpu_ref *ref) 374 { 375 struct request_queue *q = 376 container_of(ref, struct request_queue, q_usage_counter); 377 378 wake_up_all(&q->mq_freeze_wq); 379 } 380 381 static void blk_rq_timed_out_timer(struct timer_list *t) 382 { 383 struct request_queue *q = from_timer(q, t, timeout); 384 385 kblockd_schedule_work(&q->timeout_work); 386 } 387 388 static void blk_timeout_work(struct work_struct *work) 389 { 390 } 391 392 struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu) 393 { 394 struct request_queue *q; 395 396 q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu), 397 GFP_KERNEL | __GFP_ZERO, node_id); 398 if (!q) 399 return NULL; 400 401 if (alloc_srcu) { 402 blk_queue_flag_set(QUEUE_FLAG_HAS_SRCU, q); 403 if (init_srcu_struct(q->srcu) != 0) 404 goto fail_q; 405 } 406 407 q->last_merge = NULL; 408 409 q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL); 410 if (q->id < 0) 411 goto fail_srcu; 412 413 q->stats = blk_alloc_queue_stats(); 414 if (!q->stats) 415 goto fail_id; 416 417 q->node = node_id; 418 419 atomic_set(&q->nr_active_requests_shared_tags, 0); 420 421 timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); 422 INIT_WORK(&q->timeout_work, blk_timeout_work); 423 INIT_LIST_HEAD(&q->icq_list); 424 425 kobject_init(&q->kobj, &blk_queue_ktype); 426 427 mutex_init(&q->debugfs_mutex); 428 mutex_init(&q->sysfs_lock); 429 mutex_init(&q->sysfs_dir_lock); 430 spin_lock_init(&q->queue_lock); 431 432 init_waitqueue_head(&q->mq_freeze_wq); 433 mutex_init(&q->mq_freeze_lock); 434 435 /* 436 * Init percpu_ref in atomic mode so that it's faster to shutdown. 437 * See blk_register_queue() for details. 438 */ 439 if (percpu_ref_init(&q->q_usage_counter, 440 blk_queue_usage_counter_release, 441 PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) 442 goto fail_stats; 443 444 blk_set_default_limits(&q->limits); 445 q->nr_requests = BLKDEV_DEFAULT_RQ; 446 447 return q; 448 449 fail_stats: 450 blk_free_queue_stats(q->stats); 451 fail_id: 452 ida_free(&blk_queue_ida, q->id); 453 fail_srcu: 454 if (alloc_srcu) 455 cleanup_srcu_struct(q->srcu); 456 fail_q: 457 kmem_cache_free(blk_get_queue_kmem_cache(alloc_srcu), q); 458 return NULL; 459 } 460 461 /** 462 * blk_get_queue - increment the request_queue refcount 463 * @q: the request_queue structure to increment the refcount for 464 * 465 * Increment the refcount of the request_queue kobject. 466 * 467 * Context: Any context. 468 */ 469 bool blk_get_queue(struct request_queue *q) 470 { 471 if (unlikely(blk_queue_dying(q))) 472 return false; 473 kobject_get(&q->kobj); 474 return true; 475 } 476 EXPORT_SYMBOL(blk_get_queue); 477 478 #ifdef CONFIG_FAIL_MAKE_REQUEST 479 480 static DECLARE_FAULT_ATTR(fail_make_request); 481 482 static int __init setup_fail_make_request(char *str) 483 { 484 return setup_fault_attr(&fail_make_request, str); 485 } 486 __setup("fail_make_request=", setup_fail_make_request); 487 488 bool should_fail_request(struct block_device *part, unsigned int bytes) 489 { 490 return part->bd_make_it_fail && should_fail(&fail_make_request, bytes); 491 } 492 493 static int __init fail_make_request_debugfs(void) 494 { 495 struct dentry *dir = fault_create_debugfs_attr("fail_make_request", 496 NULL, &fail_make_request); 497 498 return PTR_ERR_OR_ZERO(dir); 499 } 500 501 late_initcall(fail_make_request_debugfs); 502 #endif /* CONFIG_FAIL_MAKE_REQUEST */ 503 504 static inline void bio_check_ro(struct bio *bio) 505 { 506 if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) { 507 if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) 508 return; 509 pr_warn_ratelimited("Trying to write to read-only block-device %pg\n", 510 bio->bi_bdev); 511 /* Older lvm-tools actually trigger this */ 512 } 513 } 514 515 static noinline int should_fail_bio(struct bio *bio) 516 { 517 if (should_fail_request(bdev_whole(bio->bi_bdev), bio->bi_iter.bi_size)) 518 return -EIO; 519 return 0; 520 } 521 ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO); 522 523 /* 524 * Check whether this bio extends beyond the end of the device or partition. 525 * This may well happen - the kernel calls bread() without checking the size of 526 * the device, e.g., when mounting a file system. 527 */ 528 static inline int bio_check_eod(struct bio *bio) 529 { 530 sector_t maxsector = bdev_nr_sectors(bio->bi_bdev); 531 unsigned int nr_sectors = bio_sectors(bio); 532 533 if (nr_sectors && maxsector && 534 (nr_sectors > maxsector || 535 bio->bi_iter.bi_sector > maxsector - nr_sectors)) { 536 pr_info_ratelimited("%s: attempt to access beyond end of device\n" 537 "%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n", 538 current->comm, bio->bi_bdev, bio->bi_opf, 539 bio->bi_iter.bi_sector, nr_sectors, maxsector); 540 return -EIO; 541 } 542 return 0; 543 } 544 545 /* 546 * Remap block n of partition p to block n+start(p) of the disk. 547 */ 548 static int blk_partition_remap(struct bio *bio) 549 { 550 struct block_device *p = bio->bi_bdev; 551 552 if (unlikely(should_fail_request(p, bio->bi_iter.bi_size))) 553 return -EIO; 554 if (bio_sectors(bio)) { 555 bio->bi_iter.bi_sector += p->bd_start_sect; 556 trace_block_bio_remap(bio, p->bd_dev, 557 bio->bi_iter.bi_sector - 558 p->bd_start_sect); 559 } 560 bio_set_flag(bio, BIO_REMAPPED); 561 return 0; 562 } 563 564 /* 565 * Check write append to a zoned block device. 566 */ 567 static inline blk_status_t blk_check_zone_append(struct request_queue *q, 568 struct bio *bio) 569 { 570 int nr_sectors = bio_sectors(bio); 571 572 /* Only applicable to zoned block devices */ 573 if (!bdev_is_zoned(bio->bi_bdev)) 574 return BLK_STS_NOTSUPP; 575 576 /* The bio sector must point to the start of a sequential zone */ 577 if (bio->bi_iter.bi_sector & (bdev_zone_sectors(bio->bi_bdev) - 1) || 578 !bio_zone_is_seq(bio)) 579 return BLK_STS_IOERR; 580 581 /* 582 * Not allowed to cross zone boundaries. Otherwise, the BIO will be 583 * split and could result in non-contiguous sectors being written in 584 * different zones. 585 */ 586 if (nr_sectors > q->limits.chunk_sectors) 587 return BLK_STS_IOERR; 588 589 /* Make sure the BIO is small enough and will not get split */ 590 if (nr_sectors > q->limits.max_zone_append_sectors) 591 return BLK_STS_IOERR; 592 593 bio->bi_opf |= REQ_NOMERGE; 594 595 return BLK_STS_OK; 596 } 597 598 static void __submit_bio(struct bio *bio) 599 { 600 struct gendisk *disk = bio->bi_bdev->bd_disk; 601 602 if (unlikely(!blk_crypto_bio_prep(&bio))) 603 return; 604 605 if (!disk->fops->submit_bio) { 606 blk_mq_submit_bio(bio); 607 } else if (likely(bio_queue_enter(bio) == 0)) { 608 disk->fops->submit_bio(bio); 609 blk_queue_exit(disk->queue); 610 } 611 } 612 613 /* 614 * The loop in this function may be a bit non-obvious, and so deserves some 615 * explanation: 616 * 617 * - Before entering the loop, bio->bi_next is NULL (as all callers ensure 618 * that), so we have a list with a single bio. 619 * - We pretend that we have just taken it off a longer list, so we assign 620 * bio_list to a pointer to the bio_list_on_stack, thus initialising the 621 * bio_list of new bios to be added. ->submit_bio() may indeed add some more 622 * bios through a recursive call to submit_bio_noacct. If it did, we find a 623 * non-NULL value in bio_list and re-enter the loop from the top. 624 * - In this case we really did just take the bio of the top of the list (no 625 * pretending) and so remove it from bio_list, and call into ->submit_bio() 626 * again. 627 * 628 * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio. 629 * bio_list_on_stack[1] contains bios that were submitted before the current 630 * ->submit_bio, but that haven't been processed yet. 631 */ 632 static void __submit_bio_noacct(struct bio *bio) 633 { 634 struct bio_list bio_list_on_stack[2]; 635 636 BUG_ON(bio->bi_next); 637 638 bio_list_init(&bio_list_on_stack[0]); 639 current->bio_list = bio_list_on_stack; 640 641 do { 642 struct request_queue *q = bdev_get_queue(bio->bi_bdev); 643 struct bio_list lower, same; 644 645 /* 646 * Create a fresh bio_list for all subordinate requests. 647 */ 648 bio_list_on_stack[1] = bio_list_on_stack[0]; 649 bio_list_init(&bio_list_on_stack[0]); 650 651 __submit_bio(bio); 652 653 /* 654 * Sort new bios into those for a lower level and those for the 655 * same level. 656 */ 657 bio_list_init(&lower); 658 bio_list_init(&same); 659 while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) 660 if (q == bdev_get_queue(bio->bi_bdev)) 661 bio_list_add(&same, bio); 662 else 663 bio_list_add(&lower, bio); 664 665 /* 666 * Now assemble so we handle the lowest level first. 667 */ 668 bio_list_merge(&bio_list_on_stack[0], &lower); 669 bio_list_merge(&bio_list_on_stack[0], &same); 670 bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); 671 } while ((bio = bio_list_pop(&bio_list_on_stack[0]))); 672 673 current->bio_list = NULL; 674 } 675 676 static void __submit_bio_noacct_mq(struct bio *bio) 677 { 678 struct bio_list bio_list[2] = { }; 679 680 current->bio_list = bio_list; 681 682 do { 683 __submit_bio(bio); 684 } while ((bio = bio_list_pop(&bio_list[0]))); 685 686 current->bio_list = NULL; 687 } 688 689 void submit_bio_noacct_nocheck(struct bio *bio) 690 { 691 blk_cgroup_bio_start(bio); 692 blkcg_bio_issue_init(bio); 693 694 if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { 695 trace_block_bio_queue(bio); 696 /* 697 * Now that enqueuing has been traced, we need to trace 698 * completion as well. 699 */ 700 bio_set_flag(bio, BIO_TRACE_COMPLETION); 701 } 702 703 /* 704 * We only want one ->submit_bio to be active at a time, else stack 705 * usage with stacked devices could be a problem. Use current->bio_list 706 * to collect a list of requests submited by a ->submit_bio method while 707 * it is active, and then process them after it returned. 708 */ 709 if (current->bio_list) 710 bio_list_add(&current->bio_list[0], bio); 711 else if (!bio->bi_bdev->bd_disk->fops->submit_bio) 712 __submit_bio_noacct_mq(bio); 713 else 714 __submit_bio_noacct(bio); 715 } 716 717 /** 718 * submit_bio_noacct - re-submit a bio to the block device layer for I/O 719 * @bio: The bio describing the location in memory and on the device. 720 * 721 * This is a version of submit_bio() that shall only be used for I/O that is 722 * resubmitted to lower level drivers by stacking block drivers. All file 723 * systems and other upper level users of the block layer should use 724 * submit_bio() instead. 725 */ 726 void submit_bio_noacct(struct bio *bio) 727 { 728 struct block_device *bdev = bio->bi_bdev; 729 struct request_queue *q = bdev_get_queue(bdev); 730 blk_status_t status = BLK_STS_IOERR; 731 struct blk_plug *plug; 732 733 might_sleep(); 734 735 plug = blk_mq_plug(bio); 736 if (plug && plug->nowait) 737 bio->bi_opf |= REQ_NOWAIT; 738 739 /* 740 * For a REQ_NOWAIT based request, return -EOPNOTSUPP 741 * if queue does not support NOWAIT. 742 */ 743 if ((bio->bi_opf & REQ_NOWAIT) && !bdev_nowait(bdev)) 744 goto not_supported; 745 746 if (should_fail_bio(bio)) 747 goto end_io; 748 bio_check_ro(bio); 749 if (!bio_flagged(bio, BIO_REMAPPED)) { 750 if (unlikely(bio_check_eod(bio))) 751 goto end_io; 752 if (bdev->bd_partno && unlikely(blk_partition_remap(bio))) 753 goto end_io; 754 } 755 756 /* 757 * Filter flush bio's early so that bio based drivers without flush 758 * support don't have to worry about them. 759 */ 760 if (op_is_flush(bio->bi_opf) && 761 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { 762 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); 763 if (!bio_sectors(bio)) { 764 status = BLK_STS_OK; 765 goto end_io; 766 } 767 } 768 769 if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) 770 bio_clear_polled(bio); 771 772 switch (bio_op(bio)) { 773 case REQ_OP_DISCARD: 774 if (!bdev_max_discard_sectors(bdev)) 775 goto not_supported; 776 break; 777 case REQ_OP_SECURE_ERASE: 778 if (!bdev_max_secure_erase_sectors(bdev)) 779 goto not_supported; 780 break; 781 case REQ_OP_ZONE_APPEND: 782 status = blk_check_zone_append(q, bio); 783 if (status != BLK_STS_OK) 784 goto end_io; 785 break; 786 case REQ_OP_ZONE_RESET: 787 case REQ_OP_ZONE_OPEN: 788 case REQ_OP_ZONE_CLOSE: 789 case REQ_OP_ZONE_FINISH: 790 if (!bdev_is_zoned(bio->bi_bdev)) 791 goto not_supported; 792 break; 793 case REQ_OP_ZONE_RESET_ALL: 794 if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q)) 795 goto not_supported; 796 break; 797 case REQ_OP_WRITE_ZEROES: 798 if (!q->limits.max_write_zeroes_sectors) 799 goto not_supported; 800 break; 801 default: 802 break; 803 } 804 805 if (blk_throtl_bio(bio)) 806 return; 807 submit_bio_noacct_nocheck(bio); 808 return; 809 810 not_supported: 811 status = BLK_STS_NOTSUPP; 812 end_io: 813 bio->bi_status = status; 814 bio_endio(bio); 815 } 816 EXPORT_SYMBOL(submit_bio_noacct); 817 818 #ifdef CONFIG_BLK_MQ_USE_LOCAL_THREAD 819 extern bool test_task_ux(struct task_struct *task); 820 #endif 821 822 static void bio_set_ioprio(struct bio *bio) 823 { 824 /* Nobody set ioprio so far? Initialize it based on task's nice value */ 825 if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE) 826 bio->bi_ioprio = get_current_ioprio(); 827 blkcg_set_ioprio(bio); 828 #ifdef CONFIG_BLK_MQ_USE_LOCAL_THREAD 829 bio_cnt++; 830 831 if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_RT) { 832 rt_bio_cnt++; 833 } else if (test_task_ux(current)) { 834 bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 4); 835 rt_bio_cnt++; 836 ux_bio_cnt++; 837 } 838 #endif 839 } 840 841 /** 842 * submit_bio - submit a bio to the block device layer for I/O 843 * @bio: The &struct bio which describes the I/O 844 * 845 * submit_bio() is used to submit I/O requests to block devices. It is passed a 846 * fully set up &struct bio that describes the I/O that needs to be done. The 847 * bio will be send to the device described by the bi_bdev field. 848 * 849 * The success/failure status of the request, along with notification of 850 * completion, is delivered asynchronously through the ->bi_end_io() callback 851 * in @bio. The bio must NOT be touched by the caller until ->bi_end_io() has 852 * been called. 853 */ 854 void submit_bio(struct bio *bio) 855 { 856 if (blkcg_punt_bio_submit(bio)) 857 return; 858 859 if (bio_op(bio) == REQ_OP_READ) { 860 task_io_account_read(bio->bi_iter.bi_size); 861 count_vm_events(PGPGIN, bio_sectors(bio)); 862 } else if (bio_op(bio) == REQ_OP_WRITE) { 863 count_vm_events(PGPGOUT, bio_sectors(bio)); 864 } 865 866 bio_set_ioprio(bio); 867 submit_bio_noacct(bio); 868 } 869 EXPORT_SYMBOL(submit_bio); 870 871 /** 872 * bio_poll - poll for BIO completions 873 * @bio: bio to poll for 874 * @iob: batches of IO 875 * @flags: BLK_POLL_* flags that control the behavior 876 * 877 * Poll for completions on queue associated with the bio. Returns number of 878 * completed entries found. 879 * 880 * Note: the caller must either be the context that submitted @bio, or 881 * be in a RCU critical section to prevent freeing of @bio. 882 */ 883 int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) 884 { 885 blk_qc_t cookie = READ_ONCE(bio->bi_cookie); 886 struct block_device *bdev; 887 struct request_queue *q; 888 int ret = 0; 889 890 bdev = READ_ONCE(bio->bi_bdev); 891 if (!bdev) 892 return 0; 893 894 q = bdev_get_queue(bdev); 895 if (cookie == BLK_QC_T_NONE || 896 !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) 897 return 0; 898 899 /* 900 * As the requests that require a zone lock are not plugged in the 901 * first place, directly accessing the plug instead of using 902 * blk_mq_plug() should not have any consequences during flushing for 903 * zoned devices. 904 */ 905 blk_flush_plug(current->plug, false); 906 907 /* 908 * We need to be able to enter a frozen queue, similar to how 909 * timeouts also need to do that. If that is blocked, then we can 910 * have pending IO when a queue freeze is started, and then the 911 * wait for the freeze to finish will wait for polled requests to 912 * timeout as the poller is preventer from entering the queue and 913 * completing them. As long as we prevent new IO from being queued, 914 * that should be all that matters. 915 */ 916 if (!percpu_ref_tryget(&q->q_usage_counter)) 917 return 0; 918 if (queue_is_mq(q)) { 919 ret = blk_mq_poll(q, cookie, iob, flags); 920 } else { 921 struct gendisk *disk = q->disk; 922 923 if (disk && disk->fops->poll_bio) 924 ret = disk->fops->poll_bio(bio, iob, flags); 925 } 926 blk_queue_exit(q); 927 return ret; 928 } 929 EXPORT_SYMBOL_GPL(bio_poll); 930 931 /* 932 * Helper to implement file_operations.iopoll. Requires the bio to be stored 933 * in iocb->private, and cleared before freeing the bio. 934 */ 935 int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, 936 unsigned int flags) 937 { 938 struct bio *bio; 939 int ret = 0; 940 941 /* 942 * Note: the bio cache only uses SLAB_TYPESAFE_BY_RCU, so bio can 943 * point to a freshly allocated bio at this point. If that happens 944 * we have a few cases to consider: 945 * 946 * 1) the bio is beeing initialized and bi_bdev is NULL. We can just 947 * simply nothing in this case 948 * 2) the bio points to a not poll enabled device. bio_poll will catch 949 * this and return 0 950 * 3) the bio points to a poll capable device, including but not 951 * limited to the one that the original bio pointed to. In this 952 * case we will call into the actual poll method and poll for I/O, 953 * even if we don't need to, but it won't cause harm either. 954 * 955 * For cases 2) and 3) above the RCU grace period ensures that bi_bdev 956 * is still allocated. Because partitions hold a reference to the whole 957 * device bdev and thus disk, the disk is also still valid. Grabbing 958 * a reference to the queue in bio_poll() ensures the hctxs and requests 959 * are still valid as well. 960 */ 961 rcu_read_lock(); 962 bio = READ_ONCE(kiocb->private); 963 if (bio) 964 ret = bio_poll(bio, iob, flags); 965 rcu_read_unlock(); 966 967 return ret; 968 } 969 EXPORT_SYMBOL_GPL(iocb_bio_iopoll); 970 971 void update_io_ticks(struct block_device *part, unsigned long now, bool end) 972 { 973 unsigned long stamp; 974 again: 975 stamp = READ_ONCE(part->bd_stamp); 976 if (unlikely(time_after(now, stamp)) && 977 likely(try_cmpxchg(&part->bd_stamp, &stamp, now)) && 978 (end || part_in_flight(part))) 979 __part_stat_add(part, io_ticks, now - stamp); 980 981 if (part->bd_partno) { 982 part = bdev_whole(part); 983 goto again; 984 } 985 } 986 987 unsigned long bdev_start_io_acct(struct block_device *bdev, 988 unsigned int sectors, enum req_op op, 989 unsigned long start_time) 990 { 991 const int sgrp = op_stat_group(op); 992 993 part_stat_lock(); 994 update_io_ticks(bdev, start_time, false); 995 part_stat_inc(bdev, ios[sgrp]); 996 part_stat_add(bdev, sectors[sgrp], sectors); 997 part_stat_local_inc(bdev, in_flight[op_is_write(op)]); 998 part_stat_unlock(); 999 1000 return start_time; 1001 } 1002 EXPORT_SYMBOL(bdev_start_io_acct); 1003 1004 /** 1005 * bio_start_io_acct_time - start I/O accounting for bio based drivers 1006 * @bio: bio to start account for 1007 * @start_time: start time that should be passed back to bio_end_io_acct(). 1008 */ 1009 void bio_start_io_acct_time(struct bio *bio, unsigned long start_time) 1010 { 1011 bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), 1012 bio_op(bio), start_time); 1013 } 1014 EXPORT_SYMBOL_GPL(bio_start_io_acct_time); 1015 1016 /** 1017 * bio_start_io_acct - start I/O accounting for bio based drivers 1018 * @bio: bio to start account for 1019 * 1020 * Returns the start time that should be passed back to bio_end_io_acct(). 1021 */ 1022 unsigned long bio_start_io_acct(struct bio *bio) 1023 { 1024 return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), 1025 bio_op(bio), jiffies); 1026 } 1027 EXPORT_SYMBOL_GPL(bio_start_io_acct); 1028 1029 void bdev_end_io_acct(struct block_device *bdev, enum req_op op, 1030 unsigned long start_time) 1031 { 1032 const int sgrp = op_stat_group(op); 1033 unsigned long now = READ_ONCE(jiffies); 1034 unsigned long duration = now - start_time; 1035 1036 part_stat_lock(); 1037 update_io_ticks(bdev, now, true); 1038 part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration)); 1039 part_stat_local_dec(bdev, in_flight[op_is_write(op)]); 1040 part_stat_unlock(); 1041 } 1042 EXPORT_SYMBOL(bdev_end_io_acct); 1043 1044 void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, 1045 struct block_device *orig_bdev) 1046 { 1047 bdev_end_io_acct(orig_bdev, bio_op(bio), start_time); 1048 } 1049 EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped); 1050 1051 /** 1052 * blk_lld_busy - Check if underlying low-level drivers of a device are busy 1053 * @q : the queue of the device being checked 1054 * 1055 * Description: 1056 * Check if underlying low-level drivers of a device are busy. 1057 * If the drivers want to export their busy state, they must set own 1058 * exporting function using blk_queue_lld_busy() first. 1059 * 1060 * Basically, this function is used only by request stacking drivers 1061 * to stop dispatching requests to underlying devices when underlying 1062 * devices are busy. This behavior helps more I/O merging on the queue 1063 * of the request stacking driver and prevents I/O throughput regression 1064 * on burst I/O load. 1065 * 1066 * Return: 1067 * 0 - Not busy (The request stacking driver should dispatch request) 1068 * 1 - Busy (The request stacking driver should stop dispatching request) 1069 */ 1070 int blk_lld_busy(struct request_queue *q) 1071 { 1072 if (queue_is_mq(q) && q->mq_ops->busy) 1073 return q->mq_ops->busy(q); 1074 1075 return 0; 1076 } 1077 EXPORT_SYMBOL_GPL(blk_lld_busy); 1078 1079 int kblockd_schedule_work(struct work_struct *work) 1080 { 1081 return queue_work(kblockd_workqueue, work); 1082 } 1083 EXPORT_SYMBOL(kblockd_schedule_work); 1084 1085 int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, 1086 unsigned long delay) 1087 { 1088 return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); 1089 } 1090 EXPORT_SYMBOL(kblockd_mod_delayed_work_on); 1091 1092 void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) 1093 { 1094 struct task_struct *tsk = current; 1095 1096 /* 1097 * If this is a nested plug, don't actually assign it. 1098 */ 1099 if (tsk->plug) 1100 return; 1101 1102 plug->mq_list = NULL; 1103 plug->cached_rq = NULL; 1104 plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); 1105 plug->rq_count = 0; 1106 plug->multiple_queues = false; 1107 plug->has_elevator = false; 1108 plug->nowait = false; 1109 INIT_LIST_HEAD(&plug->cb_list); 1110 1111 /* 1112 * Store ordering should not be needed here, since a potential 1113 * preempt will imply a full memory barrier 1114 */ 1115 tsk->plug = plug; 1116 } 1117 1118 /** 1119 * blk_start_plug - initialize blk_plug and track it inside the task_struct 1120 * @plug: The &struct blk_plug that needs to be initialized 1121 * 1122 * Description: 1123 * blk_start_plug() indicates to the block layer an intent by the caller 1124 * to submit multiple I/O requests in a batch. The block layer may use 1125 * this hint to defer submitting I/Os from the caller until blk_finish_plug() 1126 * is called. However, the block layer may choose to submit requests 1127 * before a call to blk_finish_plug() if the number of queued I/Os 1128 * exceeds %BLK_MAX_REQUEST_COUNT, or if the size of the I/O is larger than 1129 * %BLK_PLUG_FLUSH_SIZE. The queued I/Os may also be submitted early if 1130 * the task schedules (see below). 1131 * 1132 * Tracking blk_plug inside the task_struct will help with auto-flushing the 1133 * pending I/O should the task end up blocking between blk_start_plug() and 1134 * blk_finish_plug(). This is important from a performance perspective, but 1135 * also ensures that we don't deadlock. For instance, if the task is blocking 1136 * for a memory allocation, memory reclaim could end up wanting to free a 1137 * page belonging to that request that is currently residing in our private 1138 * plug. By flushing the pending I/O when the process goes to sleep, we avoid 1139 * this kind of deadlock. 1140 */ 1141 void blk_start_plug(struct blk_plug *plug) 1142 { 1143 blk_start_plug_nr_ios(plug, 1); 1144 } 1145 EXPORT_SYMBOL(blk_start_plug); 1146 1147 static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) 1148 { 1149 LIST_HEAD(callbacks); 1150 1151 while (!list_empty(&plug->cb_list)) { 1152 list_splice_init(&plug->cb_list, &callbacks); 1153 1154 while (!list_empty(&callbacks)) { 1155 struct blk_plug_cb *cb = list_first_entry(&callbacks, 1156 struct blk_plug_cb, 1157 list); 1158 list_del(&cb->list); 1159 cb->callback(cb, from_schedule); 1160 } 1161 } 1162 } 1163 1164 struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, 1165 int size) 1166 { 1167 struct blk_plug *plug = current->plug; 1168 struct blk_plug_cb *cb; 1169 1170 if (!plug) 1171 return NULL; 1172 1173 list_for_each_entry(cb, &plug->cb_list, list) 1174 if (cb->callback == unplug && cb->data == data) 1175 return cb; 1176 1177 /* Not currently on the callback list */ 1178 BUG_ON(size < sizeof(*cb)); 1179 cb = kzalloc(size, GFP_ATOMIC); 1180 if (cb) { 1181 cb->data = data; 1182 cb->callback = unplug; 1183 list_add(&cb->list, &plug->cb_list); 1184 } 1185 return cb; 1186 } 1187 EXPORT_SYMBOL(blk_check_plugged); 1188 1189 void __blk_flush_plug(struct blk_plug *plug, bool from_schedule) 1190 { 1191 if (!list_empty(&plug->cb_list)) 1192 flush_plug_callbacks(plug, from_schedule); 1193 blk_mq_flush_plug_list(plug, from_schedule); 1194 /* 1195 * Unconditionally flush out cached requests, even if the unplug 1196 * event came from schedule. Since we know hold references to the 1197 * queue for cached requests, we don't want a blocked task holding 1198 * up a queue freeze/quiesce event. 1199 */ 1200 if (unlikely(!rq_list_empty(plug->cached_rq))) 1201 blk_mq_free_plug_rqs(plug); 1202 } 1203 1204 /** 1205 * blk_finish_plug - mark the end of a batch of submitted I/O 1206 * @plug: The &struct blk_plug passed to blk_start_plug() 1207 * 1208 * Description: 1209 * Indicate that a batch of I/O submissions is complete. This function 1210 * must be paired with an initial call to blk_start_plug(). The intent 1211 * is to allow the block layer to optimize I/O submission. See the 1212 * documentation for blk_start_plug() for more information. 1213 */ 1214 void blk_finish_plug(struct blk_plug *plug) 1215 { 1216 if (plug == current->plug) { 1217 __blk_flush_plug(plug, false); 1218 current->plug = NULL; 1219 } 1220 } 1221 EXPORT_SYMBOL(blk_finish_plug); 1222 1223 void blk_io_schedule(void) 1224 { 1225 /* Prevent hang_check timer from firing at us during very long I/O */ 1226 unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; 1227 1228 if (timeout) 1229 io_schedule_timeout(timeout); 1230 else 1231 io_schedule(); 1232 } 1233 EXPORT_SYMBOL_GPL(blk_io_schedule); 1234 1235 int __init blk_dev_init(void) 1236 { 1237 #ifdef CONFIG_BLK_MQ_USE_LOCAL_THREAD 1238 const char *config = of_blk_feature_read("kblockd_ux_unbound_enable"); 1239 #endif 1240 BUILD_BUG_ON((__force u32)REQ_OP_LAST >= (1 << REQ_OP_BITS)); 1241 BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * 1242 sizeof_field(struct request, cmd_flags)); 1243 BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * 1244 sizeof_field(struct bio, bi_opf)); 1245 BUILD_BUG_ON(ALIGN(offsetof(struct request_queue, srcu), 1246 __alignof__(struct request_queue)) != 1247 sizeof(struct request_queue)); 1248 1249 #ifdef CONFIG_BLK_MQ_USE_LOCAL_THREAD 1250 if (config && strcmp(config, "y") == 0) 1251 kblockd_workqueue = alloc_workqueue("kblockd", 1252 WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UX | WQ_UNBOUND, 0); 1253 else 1254 #endif 1255 /* used for unplugging and affects IO latency/throughput - HIGHPRI */ 1256 kblockd_workqueue = alloc_workqueue("kblockd", 1257 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); 1258 if (!kblockd_workqueue) 1259 panic("Failed to create kblockd\n"); 1260 1261 blk_requestq_cachep = kmem_cache_create("request_queue", 1262 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 1263 1264 blk_requestq_srcu_cachep = kmem_cache_create("request_queue_srcu", 1265 sizeof(struct request_queue) + 1266 sizeof(struct srcu_struct), 0, SLAB_PANIC, NULL); 1267 1268 blk_debugfs_root = debugfs_create_dir("block", NULL); 1269 blk_mq_debugfs_init(); 1270 1271 return 0; 1272 } 1273 served by {OpenGrok Last Index Update: Sat Nov 22 16:34:38 CST 2025 submit_bio的代码流程,怎么提交为request
最新发布
11-25
#define pr_fmt(fmt) "gpio-privacy: " fmt #include <linux/delay.h> #include <linux/init.h> #include <linux/io.h> #include <linux/irq.h> #include <linux/mutex.h> #include <linux/of_irq.h> #include <linux/of_gpio.h> #include <linux/of_platform.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/input.h> #define DEFAULT_DEBOUNCE_INTERVAL 5 enum privacy_state { PRIVACY_STATE_OFF = 0, /* HW privacy is OFF */ PRIVACY_STATE_ON, /* HW privacy is ON */ }; struct privacy_state_warning_event { const char *desc; unsigned int input_type; unsigned int code; unsigned int input_value; struct input_dev *input_dev; struct work_struct work; struct privacy_priv *priv; }; struct privacy_state_event { const char *desc; unsigned int input_type; unsigned int code; struct input_dev *input_dev; struct work_struct work; int state_gpio; enum of_gpio_flags state_gpio_flags; struct privacy_priv *priv; }; struct privacy_button_event { const char *desc; unsigned int code; unsigned int input_type; int debounce_interval; struct input_dev *input_dev; struct delayed_work work; bool wakeup_capable; int button_gpio; enum of_gpio_flags button_gpio_flags; int last_button_event; unsigned long last_button_press_time; unsigned long last_button_release_time; struct privacy_priv *priv; }; struct privacy_priv { int enable_gpio; enum of_gpio_flags enable_gpio_flags; int enable_gpio_toggle_duration; int privacy_event_max_press_duration; int auto_toggle_enable_gpio_time; bool is_desired_privacy_state_on; struct mutex mutex; struct privacy_state_warning_event *state_warning_event; struct privacy_state_event *state_event; struct privacy_button_event *button_event; struct delayed_work work; }; /* Forward declarations: */ static enum privacy_state __privacy_state(struct privacy_priv *priv); static int __set_privacy_enable(struct privacy_priv *priv); static void handle_privacy_button_event(struct privacy_button_event *button_event, bool button_pressed, bool do_toggle) { enum privacy_state cur_state; struct privacy_priv *priv = button_event->priv; struct privacy_state_warning_event *state_warning_event = priv->state_warning_event; unsigned long button_press_duration; unsigned long last_button_press_time; bool is_desired_privacy_state_on; cur_state = __privacy_state(priv); if (button_pressed) { mutex_lock(&priv->mutex); button_event->last_button_press_time = jiffies; /* With the Silego chip, we only know whether we are entering * privacy state on the button press because on button press * Silego locks the current state, so save this off because * we will need this info on button release. */ if (cur_state == PRIVACY_STATE_OFF) priv->is_desired_privacy_state_on = true; mutex_unlock(&priv->mutex); pr_debug("%s: privacy button PRESSED cur_state=%d\n", __func__, cur_state); return; } pr_debug("%s: privacy button RELEASED cur_state=%d\n", __func__, cur_state); /* privacy button released ! */ mutex_lock(&priv->mutex); button_event->last_button_release_time = jiffies; last_button_press_time = button_event->last_button_press_time; is_desired_privacy_state_on = priv->is_desired_privacy_state_on; priv->is_desired_privacy_state_on = false; mutex_unlock(&priv->mutex); if (priv->privacy_event_max_press_duration > 0) { button_press_duration = last_button_press_time + msecs_to_jiffies(priv->privacy_event_max_press_duration); /* Ignore long press because it might be for Power Event or Factory Reset */ if (time_after(jiffies, button_press_duration)) { pr_debug("%s: POWER EVENT: cur_state=%d\n", __func__, cur_state); is_desired_privacy_state_on = false; } } if (is_desired_privacy_state_on && do_toggle) { if (state_warning_event != NULL) { pr_debug("%s: state_warning schedule work: cur_state=%d\n", __func__, cur_state); schedule_work(&state_warning_event->work); } if (priv->auto_toggle_enable_gpio_time >= 0) { pr_info("%s: auto-toggle-enable schedule work\n", __func__); schedule_delayed_work(&priv->work, msecs_to_jiffies(priv->auto_toggle_enable_gpio_time)); } } } static void privacy_work_func(struct work_struct *work) { struct privacy_priv *priv = container_of(work, struct privacy_priv, work.work); mutex_lock(&priv->mutex); pr_info("%s: privacy state enable immediately\n", __func__); __set_privacy_enable(priv); mutex_unlock(&priv->mutex); } static void privacy_state_warning_event_work_func(struct work_struct *work) { struct privacy_state_warning_event *state_warning_event = container_of(work, struct privacy_state_warning_event, work); pr_info("%s: sending state warning to userspace. input_type=0x%x code=0x%x\n", __func__, state_warning_event->input_type, state_warning_event->code); if (state_warning_event->input_type == EV_KEY || state_warning_event->input_type == EV_SW) { /* * EV_KEY key events and EV_SW switch events require a full transition from 0 to 1 * and then 1 to 0 in order to get future events */ input_event(state_warning_event->input_dev, state_warning_event->input_type, state_warning_event->code, 1); input_sync(state_warning_event->input_dev); input_event(state_warning_event->input_dev, state_warning_event->input_type, state_warning_event->code, 0); input_sync(state_warning_event->input_dev); } else if (state_warning_event->input_type == EV_MSC || state_warning_event->code == MSC_RAW) { /* * EV_MSC events only send a single event with 'input value' from dts */ input_event(state_warning_event->input_dev, state_warning_event->input_type, state_warning_event->code, state_warning_event->input_value); input_sync(state_warning_event->input_dev); } } static void privacy_state_event_work_func(struct work_struct *work) { bool value; struct privacy_state_event *state_event = container_of(work, struct privacy_state_event, work); value = gpio_get_value_cansleep(state_event->state_gpio); if (state_event->state_gpio_flags & OF_GPIO_ACTIVE_LOW) value = !value; input_event(state_event->input_dev, state_event->input_type, state_event->code, value); input_sync(state_event->input_dev); } static void privacy_button_event_work_func(struct work_struct *work) { int value; struct privacy_button_event *button_event = container_of(work, struct privacy_button_event, work.work); value = gpio_get_value_cansleep(button_event->button_gpio); if (unlikely(value < 0)) { /* * gpio read can fail, however we should report button * press in order to notify userspace that privacy * state has been changed. force it to * !button_event->last_button_event for that case in the hope * we just missed one press or release. */ pr_warn_ratelimited("gpio-privacy: gpio %d read failed=%d\n", button_event->button_gpio, value); value = !button_event->last_button_event; } else if (button_event->button_gpio_flags & OF_GPIO_ACTIVE_LOW) { value = !value; } if (button_event->last_button_event == value) { /* * We can reach here when : * 1) previous press/release has been canceled due to * debouce interval. * 2) gpio_get_value() failed. * 3) button is pressed and released then we got irqs together. * * We should report button press by all means in order for * userspace to be notified about new privacy mode change. * Thus send out an artificial event. * * Unlike the mute enable case, mute disable takes no delay * to complete the mode switching. Thus if the mute button is * already released, read of the current mute status gives us * the newly switched status. In this case, we shouldn't read * the current privacy status and toggle. */ handle_privacy_button_event(button_event, !value, false); input_event(button_event->input_dev, button_event->input_type, button_event->code, !value); input_sync(button_event->input_dev); } else { button_event->last_button_event = value; } handle_privacy_button_event(button_event, value, true); input_event(button_event->input_dev, button_event->input_type, button_event->code, value); input_sync(button_event->input_dev); if (button_event->wakeup_capable) pm_relax(button_event->input_dev->dev.parent); } static irqreturn_t privacy_state_interrupt(int irq, void *arg) { struct privacy_state_event *state_event = arg; schedule_work(&state_event->work); return IRQ_HANDLED; } static irqreturn_t privacy_button_interrupt(int irq, void *arg) { struct privacy_button_event *button_event = arg; if (button_event->wakeup_capable) pm_stay_awake(button_event->input_dev->dev.parent); cancel_delayed_work(&button_event->work); schedule_delayed_work(&button_event->work, msecs_to_jiffies(button_event->debounce_interval)); return IRQ_HANDLED; } static int privacy_request_interrupts(struct platform_device *pdev) { int ret; struct privacy_priv *priv = platform_get_drvdata(pdev); struct privacy_state_event *state_event = priv->state_event; struct privacy_button_event *button_event = priv->button_event; ret = devm_request_any_context_irq(&pdev->dev, gpio_to_irq(state_event->state_gpio), privacy_state_interrupt, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, "gpio-privacy-state", state_event); if (ret < 0) return ret; ret = devm_request_any_context_irq(&pdev->dev, gpio_to_irq(button_event->button_gpio), privacy_button_interrupt, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, "gpio-privacy", button_event); if (ret < 0) return ret; return 0; } static int privacy_setup_state_warning_event(struct platform_device *pdev) { int ret; struct input_dev *input; struct device *dev = &pdev->dev; struct privacy_priv *priv = platform_get_drvdata(pdev); /* state_warning_event is optional dts node */ if (priv->state_warning_event == NULL) return 0; INIT_WORK(&priv->state_warning_event->work, privacy_state_warning_event_work_func); input = devm_input_allocate_device(dev); if (!input) return -ENOMEM; input->name = "gpio-privacy-state-warning"; input->dev.parent = &pdev->dev; input_set_capability(input, priv->state_warning_event->input_type, priv->state_warning_event->code); priv->state_warning_event->input_dev = input; priv->state_warning_event->priv = priv; ret = input_register_device(input); if (ret) return ret; return 0; } static int privacy_setup_state_event(struct platform_device *pdev) { int ret; struct input_dev *input; struct device *dev = &pdev->dev; struct privacy_priv *priv = platform_get_drvdata(pdev); INIT_WORK(&priv->state_event->work, privacy_state_event_work_func); input = devm_input_allocate_device(dev); if (!input) return -ENOMEM; input->name = "gpio-privacy-state"; input->dev.parent = &pdev->dev; input_set_capability(input, priv->state_event->input_type, priv->state_event->code); priv->state_event->input_dev = input; priv->state_event->priv = priv; ret = input_register_device(input); if (ret) return ret; /* seed initial value if already in a muted state */ if (priv->state_event->input_type == EV_SW && __privacy_state(priv)) { input_event(priv->state_event->input_dev, priv->state_event->input_type, priv->state_event->code, 1); input_sync(priv->state_event->input_dev); } return 0; } static int privacy_setup_button_event(struct platform_device *pdev) { int ret; struct input_dev *input; struct device *dev = &pdev->dev; struct privacy_priv *priv = platform_get_drvdata(pdev); INIT_DELAYED_WORK(&priv->button_event->work, privacy_button_event_work_func); input = devm_input_allocate_device(dev); if (!input) return -ENOMEM; input->name = "gpio-privacy-button"; input->dev.parent = &pdev->dev; input_set_capability(input, priv->button_event->input_type, priv->button_event->code); priv->button_event->input_dev = input; priv->button_event->priv = priv; ret = input_register_device(input); if (ret) return ret; return 0; } #ifdef CONFIG_OF static int privacy_state_warning_event_parse_of(struct platform_device *pdev) { struct device_node *node; struct device_node *state_warning_event_node; struct privacy_priv *priv = platform_get_drvdata(pdev); node = pdev->dev.of_node; state_warning_event_node = of_get_child_by_name(node, "state_warning_event"); if (!state_warning_event_node) { /* state warning event is optional in dts */ dev_warn(&pdev->dev, "No state warning event configured in dts\n"); return 0; } priv->state_warning_event = devm_kzalloc(&pdev->dev, sizeof(*priv->state_warning_event), GFP_KERNEL); if (!priv->state_warning_event) return -ENOMEM; priv->state_warning_event->desc = of_get_property(state_warning_event_node, "label", NULL); if (of_property_read_u32(state_warning_event_node, "linux,input-type", &priv->state_warning_event->input_type)) priv->state_warning_event->input_type = EV_KEY; if (of_property_read_u32(state_warning_event_node, "linux,code", &priv->state_warning_event->code)) return -EINVAL; if (priv->state_warning_event->input_type == EV_MSC && priv->state_warning_event->code == MSC_RAW) { if (of_property_read_u32(state_warning_event_node, "linux,input-value", &priv->state_warning_event->input_value)) return -EINVAL; } return 0; } static int privacy_state_event_parse_of(struct platform_device *pdev) { int ret; enum of_gpio_flags flags; struct device_node *node; struct device_node *state_event_node; struct privacy_priv *priv = platform_get_drvdata(pdev); node = pdev->dev.of_node; state_event_node = of_get_child_by_name(node, "state_event"); if (!state_event_node) { dev_err(&pdev->dev, "No state event configured in dts\n"); return -EINVAL; } priv->state_event = devm_kzalloc(&pdev->dev, sizeof(*priv->state_event), GFP_KERNEL); if (!priv->state_event) return -ENOMEM; priv->state_event->desc = of_get_property(state_event_node, "label", NULL); if (of_property_read_u32(state_event_node, "linux,input-type", &priv->state_event->input_type)) priv->state_event->input_type = EV_KEY; if (of_property_read_u32(state_event_node, "linux,code", &priv->state_event->code)) return -EINVAL; priv->state_event->state_gpio = of_get_gpio_flags(state_event_node, 0, &flags); if (!gpio_is_valid(priv->state_event->state_gpio)) { dev_err(&pdev->dev, "No state gpios configured in dts\n"); return -EINVAL; } priv->state_event->state_gpio_flags = flags; ret = devm_gpio_request_one(&pdev->dev, priv->state_event->state_gpio, GPIOF_IN, "privacy-state-gpio"); if (ret) return ret; dev_info(&pdev->dev, "state gpio %d configured.\n", priv->state_event->state_gpio); return 0; } static int privacy_button_event_parse_of(struct platform_device *pdev) { int ret; enum of_gpio_flags flags; struct device_node *node; struct device_node *button_event_node; struct privacy_priv *priv = platform_get_drvdata(pdev); node = pdev->dev.of_node; button_event_node = of_get_child_by_name(node, "button_event"); if (!button_event_node) { dev_err(&pdev->dev, "No button event configured in dts\n"); return -EINVAL; } priv->button_event = devm_kzalloc(&pdev->dev, sizeof(*priv->button_event), GFP_KERNEL); if (!priv->button_event) return -ENOMEM; priv->button_event->desc = of_get_property(button_event_node, "label", NULL); if (of_property_read_u32(button_event_node, "linux,input-type", &priv->button_event->input_type)) priv->button_event->input_type = EV_KEY; if (of_property_read_u32(button_event_node, "linux,code", &priv->button_event->code)) return -EINVAL; if (of_property_read_u32(button_event_node, "debounce-interval", &priv->button_event->debounce_interval)) priv->button_event->debounce_interval = DEFAULT_DEBOUNCE_INTERVAL; priv->button_event->button_gpio = of_get_gpio_flags(button_event_node, 0, &flags); if (!gpio_is_valid(priv->button_event->button_gpio)) { dev_err(&pdev->dev, "No button gpios configured in dts\n"); return -EINVAL; } priv->button_event->button_gpio_flags = flags; ret = devm_gpio_request_one(&pdev->dev, priv->button_event->button_gpio, GPIOF_IN, "privacy-button-gpio"); if (ret) return ret; dev_info(&pdev->dev, "button gpio %d configured.\n", priv->button_event->button_gpio); priv->button_event->wakeup_capable = of_property_read_bool(button_event_node, "wakeup-source"); return 0; } static int privacy_parse_of(struct platform_device *pdev) { enum of_gpio_flags flags; int gpio, ret, gpio_init_val; struct privacy_priv *priv = platform_get_drvdata(pdev); gpio = of_get_named_gpio_flags(pdev->dev.of_node, "enable-gpio", 0, &flags); if (!gpio_is_valid(gpio)) { dev_err(&pdev->dev, "No enable gpio configured in dts\n"); return -EPROBE_DEFER; } if (flags & OF_GPIO_ACTIVE_LOW) gpio_init_val = GPIOF_OUT_INIT_HIGH; else gpio_init_val = GPIOF_OUT_INIT_LOW; ret = devm_gpio_request_one(&pdev->dev, gpio, gpio_init_val, "privacy-enable-gpio"); if (ret) return ret; priv->enable_gpio = gpio; priv->enable_gpio_flags = flags; priv->is_desired_privacy_state_on = false; if (of_property_read_u32(pdev->dev.of_node, "enable-gpio-toggle-duration", &priv->enable_gpio_toggle_duration)) priv->enable_gpio_toggle_duration = 0; if (of_property_read_u32(pdev->dev.of_node, "auto-toggle-enable-gpio-time", &priv->auto_toggle_enable_gpio_time)) priv->auto_toggle_enable_gpio_time = -1; if (of_property_read_u32(pdev->dev.of_node, "privacy-event-max-press-duration", &priv->privacy_event_max_press_duration)) priv->privacy_event_max_press_duration = 0; if (priv->auto_toggle_enable_gpio_time >= 0) INIT_DELAYED_WORK(&priv->work, privacy_work_func); return 0; } #else static int privacy_button_event_parse_of(struct platform_device *pdev) { return -EINVAL; } static int privacy_parse_of(struct platform_device *pdev) { return -EINVAL; } #endif static enum privacy_state __privacy_state(struct privacy_priv *priv) { struct privacy_state_event *state_event = priv->state_event; int value = gpio_get_value_cansleep(state_event->state_gpio); if ((!value && state_event->state_gpio_flags & OF_GPIO_ACTIVE_LOW) || (value && !(state_event->state_gpio_flags & OF_GPIO_ACTIVE_LOW))) /* return true when privacy state is on */ return PRIVACY_STATE_ON; return PRIVACY_STATE_OFF; } static int __set_privacy_enable(struct privacy_priv *priv) { int i = 0; int value = 1; /* default to 1, active high, unless proven otherwise */ const int max_wait = 100; pr_info("%s: Enter\n", __func__); if (priv->enable_gpio_flags & OF_GPIO_ACTIVE_LOW) value = 0; gpio_set_value_cansleep(priv->enable_gpio, value); if (priv->enable_gpio_toggle_duration > 0) { /* * toggle enable_gpio for specified duration but do not * wait for privacy enabled */ if (priv->enable_gpio_toggle_duration < 20) usleep_range((priv->enable_gpio_toggle_duration * 1000), (priv->enable_gpio_toggle_duration * 1000) + 100); else msleep(priv->enable_gpio_toggle_duration); } else { /* * wait for privacy enabled for up to 100ms or when * privacy state is set (which ever comes first) */ while (i < max_wait) { if (__privacy_state(priv)) break; usleep_range(1000, 1100); i++; } } gpio_set_value_cansleep(priv->enable_gpio, !value); pr_info("%s: Leave\n", __func__); if (i == max_wait) return -ETIMEDOUT; return 0; } static enum privacy_state privacy_state(struct device *dev) { enum privacy_state cur_state; struct platform_device *pdev = to_platform_device(dev); struct privacy_priv *priv = platform_get_drvdata(pdev); mutex_lock(&priv->mutex); cur_state = __privacy_state(priv); mutex_unlock(&priv->mutex); return cur_state; } static ssize_t show_privacy_state(struct device *dev, struct device_attribute *attr, char *buf) { enum privacy_state state = privacy_state(dev); return snprintf(buf, PAGE_SIZE, "%d\n", state); } static int set_privacy_enable(struct device *dev) { int ret = 0; struct platform_device *pdev = to_platform_device(dev); struct privacy_priv *priv = platform_get_drvdata(pdev); mutex_lock(&priv->mutex); pr_info("%s: privacy state enable immediately\n", __func__); __set_privacy_enable(priv); mutex_unlock(&priv->mutex); return ret; } static ssize_t store_privacy_enable(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int enable, ret; if (!kstrtoint(buf, 10, &enable)) { /* * Don't allow userspace to turn off Privacy Mode because * privacy hardware circuit won't allow it. */ if (enable == PRIVACY_STATE_OFF) return -EINVAL; ret = set_privacy_enable(dev); if (ret) return ret; } else { return -EINVAL; } return count; } static DEVICE_ATTR(enable, S_IWUSR | S_IWGRP, NULL, store_privacy_enable); static DEVICE_ATTR(state, S_IRUGO, show_privacy_state, NULL); static struct attribute *gpio_privacy_attrs[] = { &dev_attr_enable.attr, &dev_attr_state.attr, NULL, }; static struct attribute_group gpio_privacy_attr_group = { .attrs = gpio_privacy_attrs, }; static int gpio_privacy_probe(struct platform_device *pdev) { int ret; struct privacy_priv *priv; struct device *dev = &pdev->dev; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; mutex_init(&priv->mutex); platform_set_drvdata(pdev, priv); ret = privacy_parse_of(pdev); if (ret) { pr_err("failed to parse device tree = %d\n", ret); return ret; } ret = privacy_state_warning_event_parse_of(pdev); if (ret) { pr_err("failed to parse state warning event device tree = %d\n", ret); return ret; } ret = privacy_state_event_parse_of(pdev); if (ret) { pr_err("failed to parse state event device tree = %d\n", ret); return ret; } ret = privacy_button_event_parse_of(pdev); if (ret) { pr_err("failed to parse button event device tree = %d\n", ret); return ret; } ret = privacy_setup_state_warning_event(pdev); if (ret) { pr_err("failed to setup state warning event = %d\n", ret); return ret; } ret = privacy_setup_state_event(pdev); if (ret) { pr_err("failed to setup state event = %d\n", ret); return ret; } ret = privacy_setup_button_event(pdev); if (ret) { pr_err("failed to setup button event = %d\n", ret); return ret; } ret = privacy_request_interrupts(pdev); if (ret) { pr_err("failed to request interrupt = %d\n", ret); return ret; } ret = sysfs_create_group(&dev->kobj, &gpio_privacy_attr_group); if (ret) { pr_err("failed to create sysfs group = %d\n", ret); return ret; } device_init_wakeup(&pdev->dev, priv->button_event->wakeup_capable); return 0; } static int gpio_privacy_remove(struct platform_device *pdev) { struct privacy_priv *priv; struct device *dev = &pdev->dev; struct privacy_state_warning_event *state_warning_event; struct privacy_state_event *state_event; struct privacy_button_event *button_event; priv = platform_get_drvdata(pdev); state_warning_event = priv->state_warning_event; state_event = priv->state_event; button_event = priv->button_event; if (priv->auto_toggle_enable_gpio_time >= 0) cancel_delayed_work_sync(&priv->work); if (state_warning_event != NULL) cancel_work_sync(&state_warning_event->work); cancel_work_sync(&state_event->work); cancel_delayed_work_sync(&button_event->work); sysfs_remove_group(&dev->kobj, &gpio_privacy_attr_group); if (state_warning_event != NULL) pm_relax(state_warning_event->input_dev->dev.parent); pm_relax(state_event->input_dev->dev.parent); pm_relax(button_event->input_dev->dev.parent); mutex_destroy(&priv->mutex); return 0; } #ifdef CONFIG_PM_SLEEP static int gpio_privacy_suspend(struct device *dev) { struct privacy_priv *priv; struct privacy_state_event *state_event; struct privacy_button_event *button_event; struct platform_device *pdev = to_platform_device(dev); priv = platform_get_drvdata(pdev); state_event = priv->state_event; button_event = priv->button_event; if (button_event->wakeup_capable) { int error; enable_irq_wake(gpio_to_irq(button_event->button_gpio)); error = irq_set_irq_type(gpio_to_irq(button_event->button_gpio), IRQ_TYPE_EDGE_BOTH); if (error) { pr_err("%s: failed to set wakeup trigger for gpio-privacy, err=%d\n", __func__, error); disable_irq_wake(gpio_to_irq(button_event->button_gpio)); return error; } } return 0; } static int gpio_privacy_resume(struct device *dev) { struct privacy_priv *priv; struct privacy_state_event *state_event; struct privacy_button_event *button_event; struct platform_device *pdev = to_platform_device(dev); priv = platform_get_drvdata(pdev); state_event = priv->state_event; button_event = priv->button_event; if (button_event->wakeup_capable) { int error; error = irq_set_irq_type(gpio_to_irq(button_event->button_gpio), IRQ_TYPE_EDGE_BOTH); if (error) pr_err("%s: failed to restore interrupt trigger gpio-privacy, err=%d\n", __func__, error); disable_irq_wake(gpio_to_irq(button_event->button_gpio)); } return 0; } #endif #ifdef CONFIG_OF static const struct of_device_id privacy_of_table[] = { { .compatible = "gpio-privacy", }, { }, }; MODULE_DEVICE_TABLE(of, privacy_of_table); #endif #ifdef CONFIG_PM_SLEEP static SIMPLE_DEV_PM_OPS(gpio_privacy_pm_ops, gpio_privacy_suspend, gpio_privacy_resume); #endif static struct platform_driver gpio_privacy_driver = { .driver = { .name = "gpio-privacy", #ifdef CONFIG_PM_SLEEP .pm = &gpio_privacy_pm_ops, #endif .of_match_table = of_match_ptr(privacy_of_table), }, .probe = gpio_privacy_probe, .remove = gpio_privacy_remove, }; static int __init gpio_privacy_init(void) { return platform_driver_register(&gpio_privacy_driver); } static void __exit gpio_privacy_exit(void) { platform_driver_unregister(&gpio_privacy_driver); } module_init(gpio_privacy_init); module_exit(gpio_privacy_exit); MODULE_LICENSE("GPL"); how to enable pr_debug ?
08-16
Introduction During the past year, we have seen the rapid development of video generation models with the release of several open-source models, such as HunyuanVideo, CogVideoX and Mochi. It is very exciting to see that open source video models are going to beat closed source. However, the inference speed of these models is still a bottleneck for real-time applications and deployment. In this article, we will use ParaAttention, a library implements Context Parallelism and First Block Cache, as well as other techniques like torch.compile and FP8 Dynamic Quantization, to achieve the fastest inference speed for HunyuanVideo. If you want to speed up other models like CogVideoX, Mochi or FLUX, you can also follow the same steps in this article. We set up our experiments on NVIDIA L20 GPUs, which only have PCIe support. If you have NVIDIA A100 or H100 GPUs with NVLink support, you can achieve a better speedup with context parallelism, especially when the number of GPUs is large. HunyuanVideo Inference with diffusers Like many other generative AI models, HunyuanVideo has its official code repository and is supported by other frameworks like diffusers and ComfyUI. In this article, we will focus on optimizing the inference speed of HunyuanVideo with diffusers. To use HunyuanVideo with diffusers, we need to install its latest version: pip3 install -U diffusers Then, we can load the model and generate video frames with the following code: import time import torch from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel from diffusers.utils import export_to_video model_id = "tencent/HunyuanVideo" transformer = HunyuanVideoTransformer3DModel.from_pretrained( model_id, subfolder="transformer", torch_dtype=torch.bfloat16, revision="refs/pr/18", ) pipe = HunyuanVideoPipeline.from_pretrained( model_id, transformer=transformer, torch_dtype=torch.float16, revision="refs/pr/18", ).to("cuda") pipe.vae.enable_tiling() begin = time.time() output = pipe( prompt="A cat walks on the grass, realistic", height=720, width=1280, num_frames=129, num_inference_steps=30, ).frames[0] end = time.time() print(f"Time: {end - begin:.2f}s") print("Saving video to hunyuan_video.mp4") export_to_video(output, "hunyuan_video.mp4", fps=15) However, most people will experience OOM (Out of Memory) errors when running the above code. This is because the HunyuanVideo transformer model is relatively large and it has a quite large text encoder. Besides, HunyuanVideo requires a variable length of text conditions and the diffusers library implements this feature with a attn_mask in scaled_dot_product_attention. The size of attn_mask is proportional to the square of the input sequence length, which is crazy when we increase the resolution and the number of frames of the inference! Luckily, we can use ParaAttention to solve this problem. In ParaAttention, we patch the original implementation in diffusers to cut the text conditions before calling scaled_dot_product_attention. We implement this in our apply_cache_on_pipe function so we can call it after loading the model: pip3 install -U para-attn pipe = HunyuanVideoPipeline.from_pretrained( model_id, transformer=transformer, torch_dtype=torch.float16, revision="refs/pr/18", ).to("cuda") from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe apply_cache_on_pipe(pipe, residual_diff_threshold=0.0) We pass residual_diff_threshold=0.0 to apply_cache_on_pipe to disable the cache mechanism now, because we will enable it later. Here, we only want it to cut the text conditions to avoid OOM errors. If you still experience OOM errors, you can try calling pipe.enable_model_cpu_offload or pipe.enable_sequential_cpu_offload after calling apply_cache_on_pipe. This is our baseline. On one single NVIDIA L20 GPU, we can generate 129 frames with 720p resolution in 30 inference steps in 3675.71 seconds. Apply First Block Cache on HunyuanVideo By caching the output of the transformer blocks in the transformer model and resuing them in the next inference steps, we can reduce the computation cost and make the inference faster. However, it is hard to decide when to reuse the cache to ensure the quality of the generated video. Recently, TeaCache suggests that we can use the timestep embedding to approximate the difference among model outputs. And AdaCache also shows that caching can contribute grant significant inference speedups without sacrificing the generation quality, across multiple video DiT baselines. However, TeaCache is still a bit complex as it needs a rescaling strategy to ensure the accuracy of the cache. In ParaAttention, we find that we can directly use the residual difference of the first transformer block output to approximate the difference among model outputs. When the difference is small enough, we can reuse the residual difference of previous inference steps, meaning that we in fact skip this denoising step. This has been proved to be effective in our experiments and we can achieve an up to 2x speedup on HunyuanVideo inference with very good quality. Cache in Diffusion Transformer How AdaCache works, First Block Cache is a variant of it To apply the first block cache on HunyuanVideo, we can call apply_cache_on_pipe with residual_diff_threshold=0.06, which is the default value for HunyuanVideo. apply_cache_on_pipe(pipe, residual_diff_threshold=0.06) HunyuanVideo without FBCache hunyuan_video_original.mp4 HunyuanVideo with FBCache hunyuan_video_fbc.mp4 We observe that the first block cache is very effective in speeding up the inference, and maintaining nearly no quality loss in the generated video. Now, on one single NVIDIA L20 GPU, we can generate 129 frames with 720p resolution in 30 inference steps in 2271.06 seconds. This is a 1.62x speedup compared to the baseline. Quantize the model into FP8 To further speed up the inference and reduce memory usage, we can quantize the model into FP8 with dynamic quantization. We must quantize both the activation and weight of the transformer model to utilize the 8-bit Tensor Cores on NVIDIA GPUs. Here, we use float8_weight_only and float8_dynamic_activation_float8_weightto quantize the text encoder and transformer model respectively. The default quantization method is per tensor quantization. If your GPU supports row-wise quantization, you can also try it for better accuracy. diffusers-torchao provides a really good tutorial on how to quantize models in diffusers and achieve a good speedup. Here, we simply install the latest torchao that is capable of quantizing HunyuanVideo. If you are not familiar with torchao quantization, you can refer to this documentation. pip3 install -U torch torchao We also need to pass the model to torch.compile to gain actual speedup. torch.compile with mode="max-autotune-no-cudagraphs" or mode="max-autotune" can help us to achieve the best performance by generating and selecting the best kernel for the model inference. The compilation process could take a long time, but it is worth it. If you are not familiar with torch.compile, you can refer to the official tutorial. In this example, we only quantize the transformer model, but you can also quantize the text encoder to reduce more memory usage. We also need to notice that the actually compilation process is done on the first time the model is called, so we need to warm up the model to measure the speedup correctly. Note: we find that dynamic quantization can significantly change the distribution of the model output, so you might need to tweak the residual_diff_threshold to a larger value to make it take effect. import time import torch from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel from diffusers.utils import export_to_video model_id = "tencent/HunyuanVideo" transformer = HunyuanVideoTransformer3DModel.from_pretrained( model_id, subfolder="transformer", torch_dtype=torch.bfloat16, revision="refs/pr/18", ) pipe = HunyuanVideoPipeline.from_pretrained( model_id, transformer=transformer, torch_dtype=torch.float16, revision="refs/pr/18", ).to("cuda") from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe apply_cache_on_pipe(pipe) from torchao.quantization import quantize_, float8_dynamic_activation_float8_weight, float8_weight_only quantize_(pipe.text_encoder, float8_weight_only()) quantize_(pipe.transformer, float8_dynamic_activation_float8_weight()) pipe.transformer = torch.compile( pipe.transformer, mode="max-autotune-no-cudagraphs", ) # Enable memory savings pipe.vae.enable_tiling() # pipe.enable_model_cpu_offload() # pipe.enable_sequential_cpu_offload() for i in range(2): begin = time.time() output = pipe( prompt="A cat walks on the grass, realistic", height=720, width=1280, num_frames=129, num_inference_steps=1 if i == 0 else 30, ).frames[0] end = time.time() if i == 0: print(f"Warm up time: {end - begin:.2f}s") else: print(f"Time: {end - begin:.2f}s") print("Saving video to hunyuan_video.mp4") export_to_video(output, "hunyuan_video.mp4", fps=15) The NVIDIA L20 GPU only has 48GB memory and could face OOM errors after compiling the model and not calling enable_model_cpu_offload, because the HunyuanVideo has very large activation tensors when running with high resolution and large number of frames. So here we skip measuring the speedup with quantization and compilation on one single NVIDIA L20 GPU and choose to use context parallelism to release the memory pressure. If you want to run HunyuanVideo with torch.compile on GPUs with less than 80GB memory, you can try reducing the resolution and the number of frames to avoid OOM errors. Due to the fact that large video generation models usually have performance bottleneck on the attention computation rather than the fully connected layers, we don't observe a significant speedup with quantization and compilation. However, models like FLUX and SD3 can benefit a lot from quantization and compilation, it is suggested to try it for these models. Parallelize the inference with Context Parallelism A lot faster than before, right? But we are not satisfied with the speedup we have achieved so far. If we want to accelerate the inference further, we can use context parallelism to parallelize the inference. Libraries like xDit and our ParaAttention provide ways to scale up the inference with multiple GPUs. In ParaAttention, we design our API in a compositional way so that we can combine context parallelism with first block cache and dynamic quantization all together. We provide very detailed instructions and examples of how to scale up the inference with multiple GPUs in our ParaAttention repository. Users can easily launch the inference with multiple GPUs by calling torchrun. If there is a need to make the inference process persistent and serviceable, it is suggested to use torch.multiprocessing to write your own inference processor, which can eliminate the overhead of launching the process and loading and recompiling the model. Below is our ultimate code to achieve the fastest HunyuanVideo inference: import time import torch import torch.distributed as dist from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel from diffusers.utils import export_to_video dist.init_process_group() torch.cuda.set_device(dist.get_rank()) # [rank1]: RuntimeError: Expected mha_graph->execute(handle, variant_pack, workspace_ptr.get()).is_good() to be true, but got false. (Could this error message be improved? If so, please report an enhancement request to PyTorch.) # torch.backends.cuda.enable_cudnn_sdp(False) model_id = "tencent/HunyuanVideo" transformer = HunyuanVideoTransformer3DModel.from_pretrained( model_id, subfolder="transformer", torch_dtype=torch.bfloat16, revision="refs/pr/18", ) pipe = HunyuanVideoPipeline.from_pretrained( model_id, transformer=transformer, torch_dtype=torch.float16, revision="refs/pr/18", ).to("cuda") from para_attn.context_parallel import init_context_parallel_mesh from para_attn.context_parallel.diffusers_adapters import parallelize_pipe from para_attn.parallel_vae.diffusers_adapters import parallelize_vae mesh = init_context_parallel_mesh( pipe.device.type, ) parallelize_pipe( pipe, mesh=mesh, ) parallelize_vae(pipe.vae, mesh=mesh._flatten()) from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe apply_cache_on_pipe(pipe) # from torchao.quantization import quantize_, float8_dynamic_activation_float8_weight, float8_weight_only # # torch._inductor.config.reorder_for_compute_comm_overlap = True # # quantize_(pipe.text_encoder, float8_weight_only()) # quantize_(pipe.transformer, float8_dynamic_activation_float8_weight()) # pipe.transformer = torch.compile( # pipe.transformer, mode="max-autotune-no-cudagraphs", # ) # Enable memory savings pipe.vae.enable_tiling() # pipe.enable_model_cpu_offload(gpu_id=dist.get_rank()) # pipe.enable_sequential_cpu_offload(gpu_id=dist.get_rank()) for i in range(2): begin = time.time() output = pipe( prompt="A cat walks on the grass, realistic", height=720, width=1280, num_frames=129, num_inference_steps=1 if i == 0 else 30, output_type="pil" if dist.get_rank() == 0 else "pt", ).frames[0] end = time.time() if dist.get_rank() == 0: if i == 0: print(f"Warm up time: {end - begin:.2f}s") else: print(f"Time: {end - begin:.2f}s") if dist.get_rank() == 0: print("Saving video to hunyuan_video.mp4") export_to_video(output, "hunyuan_video.mp4", fps=15) dist.destroy_process_group() We save the above code to run_hunyuan_video.py and run it with torchrun: torchrun --nproc_per_node=8 run_hunyuan_video.py With 8 NVIDIA L20 GPUs, we can generate 129 frames with 720p resolution in 30 inference steps in 649.23 seconds. This is a 5.66x speedup compared to the baseline! 翻译
09-24
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值