From: Alex Zhuravlev Date: Thu, 15 Aug 2019 18:33:08 +0000 (+0400) Subject: LU-12612 osd: add lnb size down to osd X-Git-Tag: 2.12.90~107 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=8033f80de3d0db87f7e965078ceee62033adb58d LU-12612 osd: add lnb size down to osd so that each OSD can check for lnb array overflow. the patch isn't final - there will be proper implementation in osd-zfs and a new test. Signed-off-by: Alex Zhuravlev Change-Id: I43683c84e48006b4075f9a8b3e87cdfeae28c02b Reviewed-on: https://review.whamcloud.com/35801 Reviewed-by: Mike Pershin Reviewed-by: Lai Siyao Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index 9e9f1db..47e2591 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -1196,6 +1196,7 @@ struct dt_body_operations { * \param[in] pos position in the object to start * \param[in] len size of region in bytes * \param[out] lb array of descriptors to fill + * \param[in] maxlnb max slots in @lnb array * \param[in] rw 0 if used to read, 1 if used for write * * \retval positive number of descriptors on success @@ -1206,6 +1207,7 @@ struct dt_body_operations { loff_t pos, ssize_t len, struct niobuf_local *lb, + int maxlnb, enum dt_bufs_type rw); /** @@ -2409,13 +2411,14 @@ static inline int dt_ref_del(const struct lu_env *env, static inline int dt_bufs_get(const struct lu_env *env, struct dt_object *d, struct niobuf_remote *rnb, - struct niobuf_local *lnb, enum dt_bufs_type rw) + struct niobuf_local *lnb, int maxlnb, + enum dt_bufs_type rw) { LASSERT(d); LASSERT(d->do_body_ops); LASSERT(d->do_body_ops->dbo_bufs_get); return d->do_body_ops->dbo_bufs_get(env, d, rnb->rnb_offset, - rnb->rnb_len, lnb, rw); + rnb->rnb_len, lnb, maxlnb, rw); } static inline int dt_bufs_put(const struct lu_env *env, struct dt_object *d, diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 6ef38a0..850fdd0 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -348,6 +348,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_DISCONNECT_DELAY 0x245 #define OBD_FAIL_OST_DELAY_TRANS 0x246 #define OBD_FAIL_OST_PREPARE_DELAY 0x247 +#define OBD_FAIL_OST_2BIG_NIOBUF 0x248 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/mdt/mdt_io.c b/lustre/mdt/mdt_io.c index 8961fa2..7a55ba2 100644 --- a/lustre/mdt/mdt_io.c +++ b/lustre/mdt/mdt_io.c @@ -383,6 +383,7 @@ static int mdt_preprw_read(const struct lu_env *env, struct obd_export *exp, { struct dt_object *dob; int i, j, rc, tot_bytes = 0; + int maxlnb = *nr_local; int level; ENTRY; @@ -420,11 +421,12 @@ static int mdt_preprw_read(const struct lu_env *env, struct obd_export *exp, dob = mdt_obj2dt(mo); /* parse remote buffers to local buffers and prepare the latter */ for (i = 0, j = 0; i < niocount; i++) { - rc = dt_bufs_get(env, dob, rnb + i, lnb + j, 0); + rc = dt_bufs_get(env, dob, rnb + i, lnb + j, maxlnb, 0); if (unlikely(rc < 0)) GOTO(buf_put, rc); /* correct index for local buffers to continue with */ j += rc; + maxlnb -= rc; *nr_local += rc; tot_bytes += rnb[i].rnb_len; } @@ -454,6 +456,7 @@ static int mdt_preprw_write(const struct lu_env *env, struct obd_export *exp, { struct dt_object *dob; int i, j, k, rc = 0, tot_bytes = 0; + int maxlnb = *nr_local; ENTRY; @@ -491,7 +494,7 @@ static int mdt_preprw_write(const struct lu_env *env, struct obd_export *exp, dob = mdt_obj2dt(mo); /* parse remote buffers to local buffers and prepare the latter */ for (i = 0, j = 0; i < obj->ioo_bufcnt; i++) { - rc = dt_bufs_get(env, dob, rnb + i, lnb + j, 1); + rc = dt_bufs_get(env, dob, rnb + i, lnb + j, maxlnb, 1); if (unlikely(rc < 0)) GOTO(err, rc); /* correct index for local buffers to continue with */ @@ -501,6 +504,7 @@ static int mdt_preprw_write(const struct lu_env *env, struct obd_export *exp, lnb[j + k].lnb_rc = -ENOSPC; } j += rc; + maxlnb -= rc; *nr_local += rc; tot_bytes += rnb[i].rnb_len; } @@ -1544,7 +1548,7 @@ int mdt_dom_read_on_open(struct mdt_thread_info *mti, struct mdt_device *mdt, if (lnb == NULL) GOTO(unlock, rc = -ENOMEM); - rc = dt_bufs_get(env, mo, rnb, lnb, 0); + rc = dt_bufs_get(env, mo, rnb, lnb, lnbs, 0); if (unlikely(rc < 0)) GOTO(free, rc); LASSERT(rc <= lnbs); diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 3161d9a..30f6590 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -2075,7 +2075,8 @@ static int ofd_ladvise_prefetch(const struct lu_env *env, PTLRPC_MAX_BRW_PAGES; rnb.rnb_offset = start_index << PAGE_SHIFT; rnb.rnb_len = nr_local << PAGE_SHIFT; - rc = dt_bufs_get(env, ofd_object_child(fo), &rnb, lnb, dbt); + rc = dt_bufs_get(env, ofd_object_child(fo), &rnb, lnb, + PTLRPC_MAX_BRW_PAGES, dbt); if (unlikely(rc < 0)) break; nr_local = rc; diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 092e592..d4bdd68 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -511,6 +511,7 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp, struct ofd_object *fo; int i, j, rc, tot_bytes = 0; enum dt_bufs_type dbt = DT_BUFS_TYPE_READ; + int maxlnb = *nr_local; ENTRY; LASSERT(env != NULL); @@ -534,14 +535,19 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp, dbt |= DT_BUFS_TYPE_LOCAL; for (*nr_local = 0, i = 0, j = 0; i < niocount; i++) { + + if (OBD_FAIL_CHECK(OBD_FAIL_OST_2BIG_NIOBUF)) + rnb[i].rnb_len = 100 * 1024 * 1024; + rc = dt_bufs_get(env, ofd_object_child(fo), rnb + i, - lnb + j, dbt); + lnb + j, maxlnb, dbt); if (unlikely(rc < 0)) GOTO(buf_put, rc); LASSERT(rc <= PTLRPC_MAX_BRW_PAGES); /* correct index for local buffers to continue with */ j += rc; *nr_local += rc; + maxlnb -= rc; LASSERT(j <= PTLRPC_MAX_BRW_PAGES); tot_bytes += rnb[i].rnb_len; } @@ -599,6 +605,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, struct ofd_object *fo; int i, j, k, rc = 0, tot_bytes = 0; enum dt_bufs_type dbt = DT_BUFS_TYPE_WRITE; + int maxlnb = *nr_local; ENTRY; LASSERT(env != NULL); @@ -706,8 +713,10 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, /* parse remote buffers to local buffers and prepare the latter */ for (*nr_local = 0, i = 0, j = 0; i < obj->ioo_bufcnt; i++) { + if (OBD_FAIL_CHECK(OBD_FAIL_OST_2BIG_NIOBUF)) + rnb[i].rnb_len += PAGE_SIZE; rc = dt_bufs_get(env, ofd_object_child(fo), - rnb + i, lnb + j, dbt); + rnb + i, lnb + j, maxlnb, dbt); if (unlikely(rc < 0)) GOTO(err, rc); LASSERT(rc <= PTLRPC_MAX_BRW_PAGES); @@ -720,6 +729,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, } j += rc; *nr_local += rc; + maxlnb -= rc; LASSERT(j <= PTLRPC_MAX_BRW_PAGES); tot_bytes += rnb[i].rnb_len; } diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index b0774e2..c6a09d3 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -611,8 +611,9 @@ out: } static int osd_map_remote_to_local(loff_t offset, ssize_t len, int *nrpages, - struct niobuf_local *lnb) + struct niobuf_local *lnb, int maxlnb) { + int rc = 0; ENTRY; *nrpages = 0; @@ -621,6 +622,11 @@ static int osd_map_remote_to_local(loff_t offset, ssize_t len, int *nrpages, int poff = offset & (PAGE_SIZE - 1); int plen = PAGE_SIZE - poff; + if (*nrpages >= maxlnb) { + rc = -EOVERFLOW; + break; + } + if (plen > len) plen = len; lnb->lnb_file_offset = offset; @@ -642,7 +648,7 @@ static int osd_map_remote_to_local(loff_t offset, ssize_t len, int *nrpages, (*nrpages)++; } - RETURN(0); + RETURN(rc); } static struct page *osd_get_page(const struct lu_env *env, struct dt_object *dt, @@ -794,7 +800,7 @@ static int osd_bufs_put(const struct lu_env *env, struct dt_object *dt, */ static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt, loff_t pos, ssize_t len, struct niobuf_local *lnb, - enum dt_bufs_type rw) + int maxlnb, enum dt_bufs_type rw) { struct osd_thread_info *oti = osd_oti_get(env); struct osd_object *obj = osd_dt_obj(dt); @@ -812,7 +818,9 @@ static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt, } } - osd_map_remote_to_local(pos, len, &npages, lnb); + rc = osd_map_remote_to_local(pos, len, &npages, lnb, maxlnb); + if (rc) + RETURN(rc); /* this could also try less hard for DT_BUFS_TYPE_READAHEAD pages */ gfp_mask = rw & DT_BUFS_TYPE_LOCAL ? (GFP_NOFS | __GFP_HIGHMEM) : diff --git a/lustre/osd-zfs/osd_io.c b/lustre/osd-zfs/osd_io.c index ed3c92c..ccb6d68 100644 --- a/lustre/osd-zfs/osd_io.c +++ b/lustre/osd-zfs/osd_io.c @@ -330,7 +330,8 @@ static inline struct page *kmem_to_page(void *addr) * \retval negative error number of failure */ static int osd_bufs_get_read(const struct lu_env *env, struct osd_object *obj, - loff_t off, ssize_t len, struct niobuf_local *lnb) + loff_t off, ssize_t len, struct niobuf_local *lnb, + int maxlnb) { struct osd_device *osd = osd_obj2dev(obj); int rc, i, numbufs, npages = 0, drop_cache = 0; @@ -358,6 +359,10 @@ static int osd_bufs_get_read(const struct lu_env *env, struct osd_object *obj, off + len > obj->oo_dn->dn_datablksz) len = obj->oo_dn->dn_datablksz - off; + dbp = NULL; + if (unlikely(npages >= maxlnb)) + GOTO(err, rc = -EOVERFLOW); + rc = -dmu_buf_hold_array_by_bonus(&obj->oo_dn->dn_bonus->db, off, len, TRUE, osd_0copy_tag, &numbufs, &dbp); @@ -380,6 +385,9 @@ static int osd_bufs_get_read(const struct lu_env *env, struct osd_object *obj, dbf = (void *) ((unsigned long)dbp[i] | 1); while (tocpy > 0) { + if (unlikely(npages >= maxlnb)) + GOTO(err, rc = -EOVERFLOW); + thispage = PAGE_SIZE; thispage -= bufoff & (PAGE_SIZE - 1); thispage = min(tocpy, thispage); @@ -422,6 +430,8 @@ static int osd_bufs_get_read(const struct lu_env *env, struct osd_object *obj, err: LASSERT(rc < 0); + if (dbp) + dmu_buf_rele_array(dbp, numbufs, osd_0copy_tag); osd_bufs_put(env, &obj->oo_dt, lnb - npages, npages); RETURN(rc); } @@ -450,7 +460,8 @@ static inline arc_buf_t *osd_request_arcbuf(dnode_t *dn, size_t bs) } static int osd_bufs_get_write(const struct lu_env *env, struct osd_object *obj, - loff_t off, ssize_t len, struct niobuf_local *lnb) + loff_t off, ssize_t len, struct niobuf_local *lnb, + int maxlnb) { struct osd_device *osd = osd_obj2dev(obj); int poff, plen, off_in_block, sz_in_block; @@ -465,7 +476,8 @@ static int osd_bufs_get_write(const struct lu_env *env, struct osd_object *obj, * so that we're sure nobody is trying to update the same block */ while (len > 0) { - LASSERT(npages < PTLRPC_MAX_BRW_PAGES); + if (unlikely(npages >= maxlnb)) + GOTO(out_err, rc = -EOVERFLOW); off_in_block = off & (bs - 1); sz_in_block = min_t(int, bs - off_in_block, len); @@ -486,6 +498,9 @@ static int osd_bufs_get_write(const struct lu_env *env, struct osd_object *obj, while (sz_in_block > 0) { plen = min_t(int, sz_in_block, PAGE_SIZE); + if (unlikely(npages >= maxlnb)) + GOTO(out_err, rc = -EOVERFLOW); + lnb[i].lnb_file_offset = off; lnb[i].lnb_page_offset = 0; lnb[i].lnb_len = plen; @@ -523,6 +538,9 @@ static int osd_bufs_get_write(const struct lu_env *env, struct osd_object *obj, PAGE_SIZE); plen -= poff; + if (unlikely(npages >= maxlnb)) + GOTO(out_err, rc = -EOVERFLOW); + lnb[i].lnb_file_offset = off; lnb[i].lnb_page_offset = poff; poff = 0; @@ -560,7 +578,7 @@ out_err: static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt, loff_t offset, ssize_t len, struct niobuf_local *lnb, - enum dt_bufs_type rw) + int maxlnb, enum dt_bufs_type rw) { struct osd_object *obj = osd_dt_obj(dt); int rc; @@ -569,9 +587,9 @@ static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt, LASSERT(obj->oo_dn); if (rw & DT_BUFS_TYPE_WRITE) - rc = osd_bufs_get_write(env, obj, offset, len, lnb); + rc = osd_bufs_get_write(env, obj, offset, len, lnb, maxlnb); else - rc = osd_bufs_get_read(env, obj, offset, len, lnb); + rc = osd_bufs_get_read(env, obj, offset, len, lnb, maxlnb); return rc; } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 335aac4..80dcd28 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -22380,6 +22380,26 @@ test_818() { } run_test 818 "unlink with failed llog" +test_819a() { + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 + cancel_lru_locks osc + #define OBD_FAIL_OST_2BIG_NIOBUF 0x248 + do_facet $SINGLEMDS lctl set_param fail_loc=0x80000248 + dd if=$DIR/$tfile of=/dev/null bs=1M count=1 + rm -f $TDIR/$tfile +} +run_test 819a "too big niobuf in read" + +test_819b() { + #define OBD_FAIL_OST_2BIG_NIOBUF 0x248 + do_facet $SINGLEMDS lctl set_param fail_loc=0x80000248 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 + cancel_lru_locks osc + sleep 1 + rm -f $TDIR/$tfile +} +run_test 819b "too big niobuf in write" + # # tests that do cleanup/setup should be run at the end #