From be30c0d71e75d3e24674edcc7e02d005d2a9f5c2 Mon Sep 17 00:00:00 2001 From: Li Xi Date: Mon, 8 Jan 2018 10:09:46 +0900 Subject: [PATCH] LU-10472 osd-ldiskfs: add T10PI support for BIO This patch enables data integrity check in osd-ldiskfs when submiting bio. A fault injection mechanism is added to make sure the data integrity check works well. On a OST with T10PI feature enabled, following results are expected: $ lctl set_param fail_loc=0x243 fail_loc=0x243 $ dd if=/dev/zero of=/mnt/lustre/file bs=1048576 count=100 dd: error writing '/mnt/lustre/file': Invalid or incomplete multibyte or wide character 34+0 records in 33+0 records out 34603008 bytes (35 MB) copied, 0.510675 s, 67.8 MB/s When doing fault injection, the write operation will wait until the value is returned from BIO. Otherwise, returned error number may not be returned to the application. This implies a problem: because of the async submit of BIO, even the OST has T10PI enabled, the application might not be able get error notification when data corruption happens. However, there is nothing we can do to improve this (unless write performance is not important), because async commit is essential for good performance. Change-Id: I76cc14b42feed835158100d35f65aedae0d79a5c Signed-off-by: Li Xi Reviewed-on: https://review.whamcloud.com/30792 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Gu Zheng Reviewed-by: Andreas Dilger Reviewed-by: Faccini Bruno --- lustre/include/obd_support.h | 1 + lustre/osd-ldiskfs/osd_io.c | 78 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 005a958..a0dcdb9 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -330,6 +330,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_GL_WORK_ALLOC 0x240 #define OBD_FAIL_OST_SKIP_LV_CHECK 0x241 #define OBD_FAIL_OST_STATFS_DELAY 0x242 +#define OBD_FAIL_OST_INTEGRITY_FAULT 0x243 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 7b4c8a8..102ac81 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -275,28 +275,52 @@ static int can_be_merged(struct bio *bio, sector_t sector) return bio_end_sector(bio) == sector ? 1 : 0; } +/* + * This function will change the data written, thus it should only be + * used when checking data integrity feature + */ +static void bio_integrity_fault_inject(struct bio *bio) +{ + struct bio_vec *bvec; + int i; + void *kaddr; + char *addr; + + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; + + kaddr = kmap(page); + addr = kaddr; + *addr = ~(*addr); + kunmap(page); + break; + } +} + static int osd_do_bio(struct osd_device *osd, struct inode *inode, struct osd_iobuf *iobuf) { - int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; - struct page **pages = iobuf->dr_pages; - int npages = iobuf->dr_npages; - sector_t *blocks = iobuf->dr_blocks; - int total_blocks = npages * blocks_per_page; - int sector_bits = inode->i_sb->s_blocksize_bits - 9; - unsigned int blocksize = inode->i_sb->s_blocksize; - struct bio *bio = NULL; - struct page *page; - unsigned int page_offset; - sector_t sector; - int nblocks; - int block_idx; - int page_idx; - int i; - int rc = 0; + int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + struct page **pages = iobuf->dr_pages; + int npages = iobuf->dr_npages; + sector_t *blocks = iobuf->dr_blocks; + int total_blocks = npages * blocks_per_page; + int sector_bits = inode->i_sb->s_blocksize_bits - 9; + unsigned int blocksize = inode->i_sb->s_blocksize; + struct bio *bio = NULL; + struct page *page; + unsigned int page_offset; + sector_t sector; + int nblocks; + int block_idx; + int page_idx; + int i; + int rc = 0; + bool fault_inject; DECLARE_PLUG(plug); ENTRY; + fault_inject = OBD_FAIL_CHECK(OBD_FAIL_OST_INTEGRITY_FAULT); LASSERT(iobuf->dr_npages == npages); osd_brw_stats_update(osd, iobuf); @@ -353,6 +377,16 @@ static int osd_do_bio(struct osd_device *osd, struct inode *inode, bio_phys_segments(q, bio), queue_max_phys_segments(q), 0, queue_max_hw_segments(q)); + if (bio_integrity_enabled(bio)) { + if (bio_integrity_prep(bio)) { + bio_put(bio); + rc = -EIO; + goto out; + } + if (unlikely(fault_inject)) + bio_integrity_fault_inject(bio); + } + record_start_io(iobuf, bi_size); osd_submit_bio(iobuf->dr_rw, bio); } @@ -386,6 +420,16 @@ static int osd_do_bio(struct osd_device *osd, struct inode *inode, } if (bio != NULL) { + if (bio_integrity_enabled(bio)) { + if (bio_integrity_prep(bio)) { + bio_put(bio); + rc = -EIO; + goto out; + } + if (unlikely(fault_inject)) + bio_integrity_fault_inject(bio); + } + record_start_io(iobuf, bio_sectors(bio) << 9); osd_submit_bio(iobuf->dr_rw, bio); rc = 0; @@ -398,7 +442,7 @@ out: * completion here. instead we proceed with transaction commit in * parallel and wait for IO completion once transaction is stopped * see osd_trans_stop() for more details -bzzz */ - if (iobuf->dr_rw == 0) { + if (iobuf->dr_rw == 0 || fault_inject) { wait_event(iobuf->dr_wait, atomic_read(&iobuf->dr_numreqs) == 0); osd_fini_iobuf(osd, iobuf); -- 1.8.3.1