diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 310de0463beb..b32d8ce63e85 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -85,8 +85,26 @@ struct loop_cmd { struct bio_vec *bvec; struct cgroup_subsys_state *blkcg_css; struct cgroup_subsys_state *memcg_css; +#ifdef CONFIG_KCOV + unsigned long stack_entries[30]; + int stack_nr; + pid_t pid; + char comm[TASK_COMM_LEN]; +#endif }; +static void loop_check_io_race(struct loop_device *lo, struct loop_cmd *cmd) +{ +#ifdef CONFIG_KCOV + if (unlikely(data_race(READ_ONCE(lo->lo_state)) == Lo_rundown)) { + pr_err("BUG: %s/%u is doing I/O request on loop%d in Lo_rundown state.\n", + cmd->comm, cmd->pid, lo->lo_number); + printk("Call trace:\n"); + stack_trace_print(cmd->stack_entries, cmd->stack_nr, 4); + } +#endif +} + #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ) #define LOOP_DEFAULT_HW_Q_DEPTH 128 @@ -1743,8 +1761,59 @@ static void lo_release(struct gendisk *disk) need_clear = (lo->lo_state == Lo_rundown); mutex_unlock(&lo->lo_mutex); - if (need_clear) + if (need_clear) { + /* + * Temporarily release disk->open_mutex in order to flush pending I/O + * requests before clearing the backing device. + * + * This is a layering violation. But since bdev->bd_disk->fops->release() + * (which is mapped to lo_release()) is the final function which + * blkdev_put_whole() from bdev_release() calls immediately before + * releasing disk->open_mutex, this changes nothing except opens a new + * race window for allowing disk->fops->open() (which is mapped to + * lo_open()) to be called. + * + * Even if lo_open() is called from blkdev_get_whole() due to this race, + * the Lo_rundown state guarantees that lo_open() will fail with -ENXIO. + * Thus, there will be effectively no change caused by this violation. + */ + mutex_unlock(&lo->lo_disk->open_mutex); + /* + * Now that loop_queue_rq() sees lo->lo_state != Lo_bound, + * wait for already started loop_queue_rq() to complete. + */ + synchronize_rcu(); + /* + * Now that no more works are scheduled by loop_queue_rq(), + * wait for already scheduled works to complete. + */ + drain_workqueue(lo->workqueue); + /* + * Now that no more AIO requests are scheduled by lo_rw_aio(), + * wait for already started AIO to complete. + * + * Due to synchronize_rcu() + drain_workqueue() sequence above, + * calling blk_mq_unfreeze_queue() immediately after blk_mq_freeze_queue() + * returns has to be safe, for loop_queue_rq() no longer schedules new + * lo_rw_aio() works and lo_rw_aio() no longer submits new AIO requests. + * + * Deferring blk_mq_unfreeze_queue() does not help because we are about + * to clear the backing device and drop the refcount for the backing device. + * There is nothing we can do if blk_mq_freeze_queue() fails to flush. + */ + blk_mq_unfreeze_queue(lo->lo_queue, blk_mq_freeze_queue(lo->lo_queue)); + /* + * Perform remaining cleanup, with disk->open_mutex held. + * + * The lo->lo_state should remain Lo_rundown despite we temporarily + * released disk->open_mutex, for I am the only and the last user of + * this loop device because lo_open() cannot succeed. + */ + mutex_lock(&lo->lo_disk->open_mutex); + if (WARN_ON(data_race(READ_ONCE(lo->lo_state)) != Lo_rundown)) + return; __loop_clr_fd(lo); + } } static void lo_free_disk(struct gendisk *disk) @@ -1851,10 +1920,18 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); struct loop_device *lo = rq->q->queuedata; +#ifdef CONFIG_KCOV + cmd->stack_nr = stack_trace_save(cmd->stack_entries, ARRAY_SIZE(cmd->stack_entries), 0); + cmd->pid = current->pid; + get_task_comm(cmd->comm, current); +#endif + blk_mq_start_request(rq); - if (data_race(READ_ONCE(lo->lo_state)) != Lo_bound) + if (data_race(READ_ONCE(lo->lo_state)) != Lo_bound) { + loop_check_io_race(lo, cmd); return BLK_STS_IOERR; + } switch (req_op(rq)) { case REQ_OP_FLUSH: @@ -1897,6 +1974,7 @@ static void loop_handle_cmd(struct loop_cmd *cmd) int ret = 0; struct mem_cgroup *old_memcg = NULL; + loop_check_io_race(lo, cmd); if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { ret = -EIO; goto failed;