From 6932efb478366b4f43b3ff1a2df448fc73688e36 Mon Sep 17 00:00:00 2001 From: Cen Zhang Date: Sun, 21 Jun 2026 21:59:30 +0800 Subject: [PATCH] block, bfq: protect async queue reset with blkcg locks Writing 0 to BFQ's low_latency attribute ends weight raising for active, idle and async queues. The async cgroup path walks q->blkg_list, converts each blkg to BFQ policy data and then reads bfqg->async_bfqq and bfqg->async_idle_bfqq. That walk was protected only by bfqd->lock. blkcg release work is serialized by q->blkcg_mutex and q->queue_lock instead, and blkg_free_workfn() can call BFQ's pd_free_fn before it removes blkg->q_node from q->blkg_list. A low_latency reset can therefore still find the blkg on the queue list after the BFQ policy data has been freed. The buggy scenario involves two paths, with each column showing the order within that path: BFQ low_latency reset: blkcg blkg release work: 1. bfq_low_latency_store() 1. blkg_free_workfn() takes calls bfq_end_wr(). q->blkcg_mutex. 2. bfq_end_wr_async() walks 2. BFQ pd_free_fn drops the q->blkg_list. final bfq_group reference. 3. blkg_to_bfqg() returns 3. blkg->q_node remains on the stale policy data. q->blkg_list until list_del_init(). 4. bfq_end_wr_async_queues() reads async queue fields. Fix this by taking q->blkcg_mutex and q->queue_lock around the q->blkg_list walk, then taking bfqd->lock before touching BFQ async queues. The mutex serializes against policy-data free and queue_lock stabilizes the list. Move the async reset out of bfq_end_wr()'s existing bfqd->lock critical section so the lock order matches blkcg policy callbacks. Validation reproduced this kernel report: BUG: KASAN: slab-use-after-free in bfq_end_wr_async_queues+0x246/0x340 Call Trace: dump_stack_lvl+0x66/0xa0 print_report+0xce/0x630 ? bfq_end_wr_async_queues+0x246/0x340 ? srso_alias_return_thunk+0x5/0xfbef5 ? __virt_addr_valid+0x20d/0x410 ? bfq_end_wr_async_queues+0x246/0x340 kasan_report+0xe0/0x110 ? bfq_end_wr_async_queues+0x246/0x340 bfq_end_wr_async_queues+0x246/0x340 bfq_end_wr_async+0xba/0x180 bfq_low_latency_store+0x4e5/0x690 ? 0xffffffffc02150da ? __pfx_bfq_low_latency_store+0x10/0x10 ? __pfx_bfq_low_latency_store+0x10/0x10 elv_attr_store+0xc4/0x110 kernfs_fop_write_iter+0x2f5/0x4a0 vfs_write+0x604/0x11f0 ? __pfx_locks_remove_posix+0x10/0x10 ? __pfx_vfs_write+0x10/0x10 ksys_write+0xf9/0x1d0 ? __pfx_ksys_write+0x10/0x10 do_syscall_64+0x115/0x6a0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Allocated by task 544: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0xaa/0xb0 bfq_pd_alloc+0xc0/0x1b0 blkg_alloc+0x346/0x960 blkg_create+0x8c2/0x10d0 bio_associate_blkg_from_css+0x9f3/0xfa0 bio_associate_blkg+0xd9/0x200 bio_init+0x303/0x640 __blkdev_direct_IO_simple+0x56b/0x8a0 blkdev_direct_IO+0x8e7/0x2580 blkdev_read_iter+0x205/0x400 vfs_read+0x7b0/0xda0 ksys_read+0xf9/0x1d0 do_syscall_64+0x115/0x6a0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 465: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x5f/0x80 kfree+0x307/0x580 blkg_free_workfn+0xef/0x460 process_one_work+0x8d0/0x1870 worker_thread+0x575/0xf80 kthread+0x2e7/0x3c0 ret_from_fork+0x576/0x810 ret_from_fork_asm+0x1a/0x30 Fixes: 44e44a1b329e ("block, bfq: improve responsiveness") Assisted-by: Codex:gpt-5.5 Signed-off-by: Cen Zhang Reviewed-by: Tao Cui --- block/bfq-cgroup.c | 13 ++++++++++++- block/bfq-iosched.c | 3 ++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 0bd0332b3d786..d8fdace464b45 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -936,14 +936,23 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) void bfq_end_wr_async(struct bfq_data *bfqd) { + struct request_queue *q = bfqd->queue; struct blkcg_gq *blkg; - list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { + mutex_lock(&q->blkcg_mutex); + spin_lock_irq(&q->queue_lock); + spin_lock(&bfqd->lock); + + list_for_each_entry(blkg, &q->blkg_list, q_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); bfq_end_wr_async_queues(bfqd, bfqg); } bfq_end_wr_async_queues(bfqd, bfqd->root_group); + + spin_unlock(&bfqd->lock); + spin_unlock_irq(&q->queue_lock); + mutex_unlock(&q->blkcg_mutex); } static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v) @@ -1416,7 +1425,9 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {} void bfq_end_wr_async(struct bfq_data *bfqd) { + spin_lock_irq(&bfqd->lock); bfq_end_wr_async_queues(bfqd, bfqd->root_group); + spin_unlock_irq(&bfqd->lock); } struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 141c602d5e858..eec9be62061bd 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2653,9 +2653,10 @@ static void bfq_end_wr(struct bfq_data *bfqd) } list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) bfq_bfqq_end_wr(bfqq); - bfq_end_wr_async(bfqd); spin_unlock_irq(&bfqd->lock); + + bfq_end_wr_async(bfqd); } static sector_t bfq_io_struct_pos(void *io_struct, bool request)