From 35046f30b35696b5644328094fc470ba6ccfe71b Mon Sep 17 00:00:00 2001 From: Yang Sheng Date: Sat, 15 Nov 2014 23:42:23 +0800 Subject: [PATCH] LU-4958 lov: don't crash accessing LOV object with FID{0,0} Some object maybe has a corrupted LOV EA or a hole in LOV EA. We should not crash client in such case. This is a back-port patch from master: Lustre-change: http://review.whamcloud.com/10042 Lustre-commit: 754bf71c650c427acfb0fe35017e8f9c1eb9fa7d Signed-off-by: Fan Yong Signed-off-by: Yang Sheng Change-Id: I08cdd770c369eb51aa92b04a838fbb6784d36bad Reviewed-on: http://review.whamcloud.com/12740 Tested-by: Jenkins Reviewed-by: Jian Yu Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/include/obd_support.h | 1 + lustre/lod/lod_object.c | 42 +++++++++++++++++++++++++++++ lustre/lov/lov_ea.c | 14 +++++++--- lustre/lov/lov_internal.h | 11 ++++++++ lustre/lov/lov_io.c | 15 ++++++++++- lustre/lov/lov_lock.c | 31 ++++++++++++++++------ lustre/lov/lov_obd.c | 59 ++++++++++++++++++++++++++++------------- lustre/lov/lov_object.c | 7 +++++ lustre/lov/lov_request.c | 9 +++++++ lustre/tests/sanity-lfsck.sh | 63 ++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 221 insertions(+), 31 deletions(-) mode change 100644 => 100755 lustre/tests/sanity-lfsck.sh diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 2bcff3b..c1870af 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -485,6 +485,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_LOV_INIT 0x1403 #define OBD_FAIL_GLIMPSE_DELAY 0x1404 #define OBD_FAIL_LLITE_XATTR_ENOMEM 0x1405 +#define OBD_FAIL_MAKE_LOVEA_HOLE 0x1406 #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 4c62b79..3bbf3b1 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -320,6 +320,19 @@ static int lod_declare_attr_set(const struct lu_env *env, } } + if (OBD_FAIL_CHECK(OBD_FAIL_MAKE_LOVEA_HOLE) && + dt_object_exists(next) && + dt_object_remote(next) == 0 && S_ISREG(attr->la_mode)) { + struct lod_thread_info *info = lod_env_info(env); + struct lu_buf *buf = &info->lti_buf; + + buf->lb_buf = info->lti_ea_store; + buf->lb_len = info->lti_ea_store_size; + dt_declare_xattr_set(env, next, buf, XATTR_NAME_LOV, + LU_XATTR_REPLACE, handle); + } + + RETURN(rc); } @@ -357,6 +370,35 @@ static int lod_attr_set(const struct lu_env *env, } } + if (OBD_FAIL_CHECK(OBD_FAIL_MAKE_LOVEA_HOLE) && + dt_object_exists(next) && + dt_object_remote(next) == 0 && S_ISREG(attr->la_mode)) { + struct lod_thread_info *info = lod_env_info(env); + struct lu_buf *buf = &info->lti_buf; + struct lov_mds_md_v1 *lmm; + struct lov_ost_data_v1 *objs; + __u32 magic; + int rc1; + + rc1 = lod_get_lov_ea(env, lo); + if (rc1 <= 0) + RETURN(rc); + + buf->lb_buf = info->lti_ea_store; + buf->lb_len = info->lti_ea_store_size; + lmm = info->lti_ea_store; + magic = le32_to_cpu(lmm->lmm_magic); + if (le16_to_cpu(lmm->lmm_stripe_count) >= 2) { + if (magic == LOV_MAGIC_V1) + objs = &(lmm->lmm_objects[1]); + else + objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[1]; + memset(objs, 0, sizeof(*objs)); + dt_xattr_set(env, next, buf, XATTR_NAME_LOV, + LU_XATTR_REPLACE, handle, BYPASS_CAPA); + } + } + RETURN(rc); } diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index 469907d..1e564d4 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -232,6 +232,9 @@ int lsm_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm, ostid_le_to_cpu(&lmm->lmm_objects[i].l_ost_oi, &loi->loi_oi); loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx); loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen); + if (lov_oinfo_is_dummy(loi)) + continue; + if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) { CERROR("OST index %d more than OST count %d\n", loi->loi_ost_idx, lov->desc.ld_tgt_count); @@ -314,11 +317,14 @@ int lsm_unpackmd_v3(struct lov_obd *lov, struct lov_stripe_md *lsm, return -E2BIG; for (i = 0; i < stripe_count; i++) { - /* XXX LOV STACKING call down to osc_unpackmd() */ - loi = lsm->lsm_oinfo[i]; + /* XXX LOV STACKING call down to osc_unpackmd() */ + loi = lsm->lsm_oinfo[i]; ostid_le_to_cpu(&lmm->lmm_objects[i].l_ost_oi, &loi->loi_oi); - loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx); - loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen); + loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx); + loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen); + if (lov_oinfo_is_dummy(loi)) + continue; + if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) { CERROR("OST index %d more than OST count %d\n", loi->loi_ost_idx, lov->desc.ld_tgt_count); diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index d0424b7..2d780ca 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -320,4 +320,15 @@ static inline struct lov_stripe_md *lsm_addref(struct lov_stripe_md *lsm) return lsm; } +static inline bool lov_oinfo_is_dummy(const struct lov_oinfo *loi) +{ + if (unlikely(loi->loi_oi.oi.oi_id == 0 && + loi->loi_oi.oi.oi_seq == 0 && + loi->loi_ost_idx == 0 && + loi->loi_ost_gen == 0)) + return true; + + return false; +} + #endif diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index c163cfa..a61383e 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -151,6 +151,9 @@ static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio, LASSERT(sub->sub_stripe < lio->lis_stripe_count); ENTRY; + if (unlikely(lov_r0(lov)->lo_sub[stripe] == NULL)) + RETURN(-EIO); + result = 0; sub->sub_io_initialized = 0; sub->sub_borrowed = 0; @@ -406,6 +409,15 @@ static int lov_io_iter_init(const struct lu_env *env, endpos, &start, &end)) continue; + if (unlikely(lov_r0(lio->lis_object)->lo_sub[stripe] == NULL)) { + if (ios->cis_io->ci_type == CIT_READ || + ios->cis_io->ci_type == CIT_WRITE || + ios->cis_io->ci_type == CIT_FAULT) + RETURN(-EIO); + + continue; + } + end = lov_offset_mod(end, +1); sub = lov_sub_get(env, lio, stripe); if (!IS_ERR(sub)) { @@ -921,7 +933,8 @@ int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj, ENTRY; CFS_INIT_LIST_HEAD(&lio->lis_active); - lov_io_slice_init(lio, lov, io); + lov_io_slice_init(lio, lov, io); + if (io->ci_result == 0) { io->ci_result = lov_io_subio_init(env, lio, io); if (io->ci_result == 0) { diff --git a/lustre/lov/lov_lock.c b/lustre/lov/lov_lock.c index 916a415..4e3f483 100644 --- a/lustre/lov/lov_lock.c +++ b/lustre/lov/lov_lock.c @@ -318,8 +318,9 @@ static int lov_lock_sub_init(const struct lu_env *env, * XXX for wide striping smarter algorithm is desirable, * breaking out of the loop, early. */ - if (lov_stripe_intersects(loo->lo_lsm, i, - file_start, file_end, &start, &end)) + if (likely(r0->lo_sub[i] != NULL) && + lov_stripe_intersects(loo->lo_lsm, i, + file_start, file_end, &start, &end)) nr++; } LASSERT(nr > 0); @@ -336,8 +337,9 @@ static int lov_lock_sub_init(const struct lu_env *env, * top-lock. */ for (i = 0, nr = 0; i < r0->lo_nr; ++i) { - if (lov_stripe_intersects(loo->lo_lsm, i, - file_start, file_end, &start, &end)) { + if (likely(r0->lo_sub[i] != NULL) && + lov_stripe_intersects(loo->lo_lsm, i, + file_start, file_end, &start, &end)) { struct cl_lock_descr *descr; descr = &lck->lls_sub[nr].sub_descr; @@ -950,10 +952,23 @@ static int lov_lock_stripe_is_matching(const struct lu_env *env, */ start = cl_offset(&lov->lo_cl, descr->cld_start); end = cl_offset(&lov->lo_cl, descr->cld_end + 1) - 1; - result = end - start <= lsm->lsm_stripe_size && - stripe == lov_stripe_number(lsm, start) && - stripe == lov_stripe_number(lsm, end); - if (result) { + + result = 0; + /* glimpse should work on the object with LOV EA hole. */ + if (end - start <= lsm->lsm_stripe_size) { + int idx; + + idx = lov_stripe_number(lsm, start); + if (idx == stripe || + unlikely(lov_r0(lov)->lo_sub[idx] == NULL)) { + idx = lov_stripe_number(lsm, end); + if (idx == stripe || + unlikely(lov_r0(lov)->lo_sub[idx] == NULL)) + result = 1; + } + } + + if (result != 0) { struct cl_lock_descr *subd = &lov_env_info(env)->lti_ldescr; obd_off sub_start; obd_off sub_end; diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 2665987..fc65ca1 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -1035,8 +1035,13 @@ static int lov_recreate(struct obd_export *exp, struct obdo *src_oa, GOTO(out, rc = -EINVAL); for (i = 0; i < lsm->lsm_stripe_count; i++) { - if (lsm->lsm_oinfo[i]->loi_ost_idx == ost_idx) { - if (ostid_id(&lsm->lsm_oinfo[i]->loi_oi) != + struct lov_oinfo *loi = lsm->lsm_oinfo[i]; + + if (lov_oinfo_is_dummy(loi)) + continue; + + if (loi->loi_ost_idx == ost_idx) { + if (ostid_id(&loi->loi_oi) != ostid_id(&src_oa->o_oi)) GOTO(out, rc = -EINVAL); break; @@ -1696,10 +1701,13 @@ static int lov_change_cbdata(struct obd_export *exp, struct lov_stripe_md submd; struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - if (!lov->lov_tgts[loi->loi_ost_idx]) { - CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx); - continue; - } + if (lov_oinfo_is_dummy(loi)) + continue; + + if (!lov->lov_tgts[loi->loi_ost_idx]) { + CDEBUG(D_HA, "lov idx %d NULL\n", loi->loi_ost_idx); + continue; + } submd.lsm_oi = loi->loi_oi; submd.lsm_stripe_count = 0; @@ -1731,10 +1739,14 @@ static int lov_find_cbdata(struct obd_export *exp, struct lov_stripe_md submd; struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - if (!lov->lov_tgts[loi->loi_ost_idx]) { - CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx); - continue; - } + if (lov_oinfo_is_dummy(loi)) + continue; + + if (!lov->lov_tgts[loi->loi_ost_idx]) { + CDEBUG(D_HA, "lov idx %d NULL\n", loi->loi_ost_idx); + continue; + } + submd.lsm_oi = loi->loi_oi; submd.lsm_stripe_count = 0; rc = obd_find_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, @@ -2154,14 +2166,19 @@ obd_size fiemap_calc_fm_end_offset(struct ll_user_fiemap *fiemap, fiemap->fm_extents[0].fe_logical == 0) return 0; - /* Find out stripe_no from ost_index saved in the fe_device */ - for (i = 0; i < lsm->lsm_stripe_count; i++) { - if (lsm->lsm_oinfo[i]->loi_ost_idx == - fiemap->fm_extents[0].fe_device) { - stripe_no = i; - break; - } - } + /* Find out stripe_no from ost_index saved in the fe_device */ + for (i = 0; i < lsm->lsm_stripe_count; i++) { + struct lov_oinfo *oinfo = lsm->lsm_oinfo[i]; + + if (lov_oinfo_is_dummy(oinfo)) + continue; + + if (oinfo->loi_ost_idx == fiemap->fm_extents[0].fe_device) { + stripe_no = i; + break; + } + } + if (stripe_no == -1) return -EINVAL; @@ -2350,6 +2367,9 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, &lun_start, &obd_object_end)) == 0) continue; + if (lov_oinfo_is_dummy(lsm->lsm_oinfo[cur_stripe])) + GOTO(out, rc = -EIO); + /* If this is a continuation FIEMAP call and we are on * starting stripe then lun_start needs to be set to * fm_end_offset */ @@ -2537,6 +2557,9 @@ static int lov_get_info(const struct lu_env *env, struct obd_export *exp, * be NULL and won't match the lock's export. */ for (i = 0; i < lsm->lsm_stripe_count; i++) { loi = lsm->lsm_oinfo[i]; + if (lov_oinfo_is_dummy(loi)) + continue; + if (!lov->lov_tgts[loi->loi_ost_idx]) continue; if (lov->lov_tgts[loi->loi_ost_idx]->ltd_exp == diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index ddd375a..516f3a8 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -234,6 +234,9 @@ static int lov_init_raid0(const struct lu_env *env, struct lov_oinfo *oinfo = lsm->lsm_oinfo[i]; int ost_idx = oinfo->loi_ost_idx; + if (lov_oinfo_is_dummy(oinfo)) + continue; + result = ostid_to_fid(ofid, &oinfo->loi_oi, oinfo->loi_ost_idx); if (result != 0) @@ -994,6 +997,10 @@ int lov_read_and_clear_async_rc(struct cl_object *clob) LASSERT(lsm != NULL); for (i = 0; i < lsm->lsm_stripe_count; i++) { struct lov_oinfo *loi = lsm->lsm_oinfo[i]; + + if (lov_oinfo_is_dummy(loi)) + continue; + if (loi->loi_ar.ar_rc && !rc) rc = loi->loi_ar.ar_rc; loi->loi_ar.ar_rc = 0; diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index 10324fe..f75445e 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -883,6 +883,9 @@ int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo, struct lov_request *req; loi = oinfo->oi_md->lsm_oinfo[i]; + if (lov_oinfo_is_dummy(loi)) + continue; + if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) { CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); if (oinfo->oi_oa->o_valid & OBD_MD_FLEPOCH) @@ -964,6 +967,9 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo, struct lov_request *req; loi = lsm->lsm_oinfo[i]; + if (lov_oinfo_is_dummy(loi)) + continue; + if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) { CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); continue; @@ -1074,6 +1080,9 @@ int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo, struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i]; struct lov_request *req; + if (lov_oinfo_is_dummy(loi)) + continue; + if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) { CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); continue; diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh old mode 100644 new mode 100755 index d08d75a..a0c5247 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -963,6 +963,69 @@ test_10() } run_test 10 "System is available during LFSCK scanning" +test_20a() { + local server_version=$(lustre_version_code $SINGLEMDS) + + [[ $server_version -ge $(version_code 2.5.60) ]] || + [[ $server_version -ge $(version_code 2.5.3) && + $server_version -lt $(version_code 2.5.11) ]] || + { skip "Need MDS version 2.5.4+ or 2.5.60+"; return; } + + [ $OSTCOUNT -lt 2 ] && + skip "The test needs at least 2 OSTs" && return + + echo "#####" + echo "For old client, even though it cannot access the file with" + echo "LOV EA hole, it should not cause the system crash." + echo "#####" + + lfsck_prep 0 0 + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(1) Fail to start MDS!" + + mount_client $MOUNT || error "(2) Fail to start client!" + $LFS mkdir -i 0 $DIR/$tdir/a1 + if [ $OSTCOUNT -gt 2 ]; then + $LFS setstripe -c 3 -i 0 -s 1M $DIR/$tdir/a1 + bcount=513 + else + $LFS setstripe -c 2 -i 0 -s 1M $DIR/$tdir/a1 + bcount=257 + fi + + # 256 blocks on the stripe0. + # 1 block on the stripe1 for 2 OSTs case. + # 256 blocks on the stripe1 for other cases. + # 1 block on the stripe2 if OSTs > 2 + dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount + + local fid0=$($LFS path2fid $DIR/$tdir/a1/f0) + + echo ${fid0} + $LFS getstripe $DIR/$tdir/a1/f0 + + cancel_lru_locks osc + + echo "Inject failure..." + echo "To make a LOV EA hole..." + #define OBD_FAIL_MAKE_LOVEA_HOLE 0x1406 + do_facet mds1 $LCTL set_param fail_loc=0x1406 + chown 1.1 $DIR/$tdir/a1/f0 + + umount_client $MOUNT + sync + sleep 2 + do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0 + + mount_client $MOUNT || error "Fail to start client!" + + $LFS getstripe $DIR/$tdir/a1/f0 + dd if=$DIR/$tdir/a1/f0 of=/dev/null + return 0 # not crash +} +run_test 20a "Don't crash client while access with LOV EA hole" + $LCTL set_param debug=-lfsck > /dev/null || true # restore MDS/OST size -- 1.8.3.1