diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 1db565b37620..f382e9f9be05 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -37,6 +37,7 @@ struct io { struct dm_io_client *client; io_notify_fn callback; void *context; + struct bio *orig_bio; void *vma_invalidate_address; unsigned long vma_invalidate_size; } __aligned(DM_IO_MAX_REGIONS); @@ -132,8 +133,18 @@ static void complete_io(struct io *io) static void dec_count(struct io *io, unsigned int region, blk_status_t error) { - if (error) + if (error) { set_bit(region, &io->error_bits); + /* + * BLK_STS_INVAL means the bio was not valid for the underlying + * device (e.g. a misaligned direct I/O), which is a caller error + * rather than a device failure. Record it on the original bio so + * bio-based targets can propagate it instead of treating it as a + * media error and failing the device. + */ + if (error == BLK_STS_INVAL && io->orig_bio) + io->orig_bio->bi_status = error; + } if (atomic_dec_and_test(&io->count)) complete_io(io); @@ -170,12 +181,11 @@ struct dpages { struct page **p, unsigned long *len, unsigned int *offset); void (*next_page)(struct dpages *dp); - union { - unsigned int context_u; - struct bvec_iter context_bi; - }; + unsigned int context_u; void *context_ptr; + struct bio *orig_bio; + void *vma_invalidate_address; unsigned long vma_invalidate_size; }; @@ -210,44 +220,6 @@ static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned int o dp->context_ptr = pl; } -/* - * Functions for getting the pages from a bvec. - */ -static void bio_get_page(struct dpages *dp, struct page **p, - unsigned long *len, unsigned int *offset) -{ - struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr, - dp->context_bi); - - *p = bvec.bv_page; - *len = bvec.bv_len; - *offset = bvec.bv_offset; - - /* avoid figuring it out again in bio_next_page() */ - dp->context_bi.bi_sector = (sector_t)bvec.bv_len; -} - -static void bio_next_page(struct dpages *dp) -{ - unsigned int len = (unsigned int)dp->context_bi.bi_sector; - - bvec_iter_advance((struct bio_vec *)dp->context_ptr, - &dp->context_bi, len); -} - -static void bio_dp_init(struct dpages *dp, struct bio *bio) -{ - dp->get_page = bio_get_page; - dp->next_page = bio_next_page; - - /* - * We just use bvec iterator to retrieve pages, so it is ok to - * access the bvec table directly here - */ - dp->context_ptr = bio->bi_io_vec; - dp->context_bi = bio->bi_iter; -} - /* * Functions for getting the pages from a VMA. */ @@ -332,6 +304,21 @@ static void do_region(const blk_opf_t opf, unsigned int region, return; } + if (dp->orig_bio) { + bio = bio_alloc_clone(where->bdev, dp->orig_bio, GFP_NOIO, + &io->client->bios); + bio->bi_iter.bi_sector = where->sector; + bio->bi_iter.bi_size = where->count << SECTOR_SHIFT; + bio->bi_opf = opf; + bio->bi_end_io = endio; + bio->bi_ioprio = ioprio; + store_io_and_region_in_bio(bio, io, region); + + atomic_inc(&io->count); + submit_bio(bio); + return; + } + /* * where->count may be zero if op holds a flush and we need to * send a zero-sized flush. @@ -422,6 +409,7 @@ static void async_io(struct dm_io_client *client, unsigned int num_regions, io->client = client; io->callback = fn; io->context = context; + io->orig_bio = dp->orig_bio; io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; @@ -468,6 +456,7 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, dp->vma_invalidate_address = NULL; dp->vma_invalidate_size = 0; + dp->orig_bio = NULL; switch (io_req->mem.type) { case DM_IO_PAGE_LIST: @@ -475,7 +464,11 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, break; case DM_IO_BIO: - bio_dp_init(dp, io_req->mem.ptr.bio); + /* + * The destination bios clone this bio's biovec directly, so + * there are no per-page accessors to set up here. + */ + dp->orig_bio = io_req->mem.ptr.bio; break; case DM_IO_VMA: diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index de5c00704e69..022ad791c295 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -524,6 +524,17 @@ static void read_callback(unsigned long error, void *context) return; } + /* + * BLK_STS_INVAL means the bio was not valid for the underlying device, + * e.g. a misaligned direct I/O. That is a caller error, not a device + * failure, so propagate it rather than failing the mirror and retrying + * on the other legs, which would fail the same way. + */ + if (bio->bi_status == BLK_STS_INVAL) { + bio_endio(bio); + return; + } + fail_mirror(m, DM_RAID1_READ_ERROR); if (likely(default_ok(m)) || mirror_available(m->ms, bio)) { @@ -622,6 +633,16 @@ static void write_callback(unsigned long error, void *context) return; } + /* + * BLK_STS_INVAL means the bio was not valid for the underlying device, + * e.g. a misaligned direct I/O. Propagate the error without degrading + * the array. + */ + if (bio->bi_status == BLK_STS_INVAL) { + bio_endio(bio); + return; + } + /* * If the bio is discard, return an error, but do not * degrade the array. @@ -1262,7 +1283,12 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, return DM_ENDIO_DONE; } - if (*error == BLK_STS_NOTSUPP) + /* + * BLK_STS_INVAL means the bio was not valid for the underlying device, + * e.g. a misaligned direct I/O. Propagate it rather than failing the + * mirror and retrying, which would fail the same way on every leg. + */ + if (*error == BLK_STS_NOTSUPP || *error == BLK_STS_INVAL) goto out; if (bio->bi_opf & REQ_RAHEAD)