From 51f5621c04b363dcde7e1bb3dcdb2ebd3b2919fd Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Mon, 10 Feb 2014 21:16:50 +0800 Subject: [PATCH] LU-3594 lfsck: repair inconsistent owner and multiple referenced cases Sometimes, the OST-object owner information is inconsistent with the MDT-object owner information because of incompleted chown/chgrp, or other system crash. Under such case, the MDT-object owner information is trusted over the OST-object's. Because the chown/chgrp processing order is: client => MDT => OST, it is possible that the OST-object owner information is stale rather than the MDT-object's. Also, the MDT-object's owner information is visible to users and can be directly repaired by the system administrator, while the OST-object's owner information is only used internally by quota. So the LFSCK will update the OST-object owner information according to the MDT-object's owner. If both MDT-object1 and MDT-object2 claim the OST-object1 as one of its child OST-object, but the OST-object1 only recognizes the MDT-object1, then the LFSCK will create new a OST-object and fix the MDT-object2's layout information to reference the new created OST-object. Replace is_remote_th() with is_only_remote_trans(), then drop the compat patch http://review.whamcloud.com/9361 Signed-off-by: Fan Yong Change-Id: I6b148180b5a2d68650b291250c03aac651e5f6e9 Reviewed-on: http://review.whamcloud.com/7524 Reviewed-by: Andreas Dilger Tested-by: Jenkins Reviewed-by: Alex Zhuravlev Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 2 + lustre/lfsck/lfsck_layout.c | 220 +++++++++++++++++++++++++++++++++++++++++-- lustre/lod/lod_lov.c | 11 ++- lustre/lod/lod_object.c | 6 ++ lustre/mdd/mdd_object.c | 24 +++-- lustre/osp/osp_internal.h | 4 +- lustre/osp/osp_object.c | 8 +- lustre/osp/osp_trans.c | 34 ++++--- lustre/target/out_lib.c | 3 +- lustre/tests/sanity-lfsck.sh | 113 +++++++++++++++++++++- 10 files changed, 386 insertions(+), 39 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 7b636b5..a29a458 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -504,6 +504,8 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_LFSCK_DANGLING 0x1610 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612 +#define OBD_FAIL_LFSCK_BAD_OWNER 0x1613 +#define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614 #define OBD_FAIL_LFSCK_NOTIFY_NET 0x16f0 #define OBD_FAIL_LFSCK_QUERY_NET 0x16f1 diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index ea0450d..07b6d6b 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -1560,6 +1560,205 @@ unlock1: return rc; } +/* If there are more than one MDT-objects claim as the OST-object's parent, + * and the OST-object only recognizes one of them, then we need to generate + * new OST-object(s) with new fid(s) for the non-recognized MDT-object(s). */ +static int lfsck_layout_repair_multiple_references(const struct lu_env *env, + struct lfsck_component *com, + struct lfsck_layout_req *llr, + struct lu_attr *la, + struct lu_buf *buf) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + struct dt_allocation_hint *hint = &info->lti_hint; + struct dt_object_format *dof = &info->lti_dof; + struct dt_device *pdev = com->lc_lfsck->li_next; + struct ost_id *oi = &info->lti_oi; + struct dt_object *parent = llr->llr_parent->llo_obj; + struct dt_device *cdev = lfsck_obj2dt_dev(llr->llr_child); + struct dt_object *child = NULL; + struct lu_device *d = &cdev->dd_lu_dev; + struct lu_object *o = NULL; + struct thandle *handle; + struct lov_mds_md_v1 *lmm; + struct lov_ost_data_v1 *objs; + struct lustre_handle lh = { 0 }; + __u32 magic; + int rc; + ENTRY; + + CDEBUG(D_LFSCK, "Repair multiple references for: parent "DFID + ", OST-index %u, stripe-index %u, owner %u:%u\n", + PFID(lfsck_dto2fid(parent)), llr->llr_ost_idx, + llr->llr_lov_idx, la->la_uid, la->la_gid); + + rc = lfsck_layout_lock(env, com, parent, &lh, + MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR); + if (rc != 0) + RETURN(rc); + + handle = dt_trans_create(env, pdev); + if (IS_ERR(handle)) + GOTO(unlock1, rc = PTR_ERR(handle)); + + o = lu_object_anon(env, d, NULL); + if (IS_ERR(o)) + GOTO(stop, rc = PTR_ERR(o)); + + child = container_of(o, struct dt_object, do_lu); + o = lu_object_locate(o->lo_header, d->ld_type); + if (unlikely(o == NULL)) + GOTO(stop, rc = -EINVAL); + + child = container_of(o, struct dt_object, do_lu); + la->la_valid = LA_UID | LA_GID; + hint->dah_parent = NULL; + hint->dah_mode = 0; + dof->dof_type = DFT_REGULAR; + rc = dt_declare_create(env, child, la, NULL, NULL, handle); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_declare_xattr_set(env, parent, buf, XATTR_NAME_LOV, + LU_XATTR_REPLACE, handle); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_trans_start(env, pdev, handle); + if (rc != 0) + GOTO(stop, rc); + + dt_write_lock(env, parent, 0); + if (unlikely(lu_object_is_dying(parent->do_lu.lo_header))) + GOTO(unlock2, rc = 0); + + rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV, BYPASS_CAPA); + if (unlikely(rc == 0 || rc == -ENODATA || rc == -ERANGE)) + GOTO(unlock2, rc = 0); + + lmm = buf->lb_buf; + rc = lfsck_layout_verify_header(lmm); + if (rc != 0) + GOTO(unlock2, rc); + + /* Someone change layout during the LFSCK, no need to repair then. */ + if (le16_to_cpu(lmm->lmm_layout_gen) != llr->llr_parent->llo_gen) + GOTO(unlock2, rc = 0); + + rc = dt_create(env, child, la, hint, dof, handle); + if (rc != 0) + GOTO(unlock2, rc); + + /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has + * been verified in lfsck_layout_verify_header() already. If some + * new magic introduced in the future, then layout LFSCK needs to + * be updated also. */ + magic = le32_to_cpu(lmm->lmm_magic); + if (magic == LOV_MAGIC_V1) { + objs = &(lmm->lmm_objects[0]); + } else { + LASSERT(magic == LOV_MAGIC_V3); + objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0]; + } + + lmm->lmm_layout_gen = cpu_to_le16(llr->llr_parent->llo_gen + 1); + fid_to_ostid(lu_object_fid(&child->do_lu), oi); + ostid_cpu_to_le(oi, &objs[llr->llr_lov_idx].l_ost_oi); + objs[llr->llr_lov_idx].l_ost_gen = cpu_to_le32(0); + objs[llr->llr_lov_idx].l_ost_idx = cpu_to_le32(llr->llr_ost_idx); + rc = dt_xattr_set(env, parent, buf, XATTR_NAME_LOV, + LU_XATTR_REPLACE, handle, BYPASS_CAPA); + + GOTO(unlock2, rc = (rc == 0 ? 1 : rc)); + +unlock2: + dt_write_unlock(env, parent); + +stop: + if (child != NULL) + lu_object_put(env, &child->do_lu); + + dt_trans_stop(env, pdev, handle); + +unlock1: + lfsck_layout_unlock(&lh); + + return rc; +} + +/* If the MDT-object and the OST-object have different owner information, + * then trust the MDT-object, because the normal chown/chgrp handle order + * is from MDT to OST, and it is possible that some chown/chgrp operation + * is partly done. */ +static int lfsck_layout_repair_owner(const struct lu_env *env, + struct lfsck_component *com, + struct lfsck_layout_req *llr, + struct lu_attr *pla) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lu_attr *tla = &info->lti_la3; + struct dt_object *parent = llr->llr_parent->llo_obj; + struct dt_object *child = llr->llr_child; + struct dt_device *dev = lfsck_obj2dt_dev(child); + struct thandle *handle; + int rc; + ENTRY; + + CDEBUG(D_LFSCK, "Repair inconsistent file owner for: parent "DFID + ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n", + PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)), + llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid); + + handle = dt_trans_create(env, dev); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + tla->la_uid = pla->la_uid; + tla->la_gid = pla->la_gid; + tla->la_valid = LA_UID | LA_GID; + rc = dt_declare_attr_set(env, child, tla, handle); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_trans_start(env, dev, handle); + if (rc != 0) + GOTO(stop, rc); + + /* Use the dt_object lock to serialize with destroy and attr_set. */ + dt_read_lock(env, parent, 0); + if (unlikely(lu_object_is_dying(parent->do_lu.lo_header))) + GOTO(unlock, rc = 1); + + /* Get the latest parent's owner. */ + rc = dt_attr_get(env, parent, tla, BYPASS_CAPA); + if (rc != 0) { + CWARN("%s: fail to get the latest parent's ("DFID") owner, " + "not sure whether some others chown/chgrp during the " + "LFSCK: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), + PFID(lfsck_dto2fid(parent)), rc); + + GOTO(unlock, rc); + } + + /* Some others chown/chgrp during the LFSCK, needs to do nothing. */ + if (unlikely(tla->la_uid != pla->la_uid || + tla->la_gid != pla->la_gid)) + GOTO(unlock, rc = 1); + + tla->la_valid = LA_UID | LA_GID; + rc = dt_attr_set(env, child, tla, handle, BYPASS_CAPA); + + GOTO(unlock, rc); + +unlock: + dt_read_unlock(env, parent); + +stop: + rc = lfsck_layout_trans_stop(env, dev, handle, rc); + + return rc; +} + /* Check whether the OST-object correctly back points to the * MDT-object (@parent) via the XATTR_NAME_FID xattr (@pfid). */ static int lfsck_layout_check_parent(const struct lu_env *env, @@ -1585,8 +1784,13 @@ static int lfsck_layout_check_parent(const struct lu_env *env, if (fid_is_zero(pfid)) { /* client never wrote. */ - if (cla->la_size == 0 && cla->la_blocks == 0) + if (cla->la_size == 0 && cla->la_blocks == 0) { + if (unlikely(cla->la_uid != pla->la_uid || + cla->la_gid != pla->la_gid)) + RETURN (LLIT_INCONSISTENT_OWNER); + RETURN(0); + } RETURN(LLIT_UNMATCHED_PAIR); } @@ -1669,7 +1873,7 @@ static int lfsck_layout_assistant_handle_one(const struct lu_env *env, struct lfsck_thread_info *info = lfsck_env_info(env); struct filter_fid_old *pea = &info->lti_old_pfid; struct lu_fid *pfid = &info->lti_fid; - struct lu_buf *buf; + struct lu_buf *buf = NULL; struct dt_object *parent = llr->llr_parent->llo_obj; struct dt_object *child = llr->llr_child; struct lu_attr *pla = &info->lti_la; @@ -1734,7 +1938,11 @@ static int lfsck_layout_assistant_handle_one(const struct lu_env *env, if (rc < 0) GOTO(out, rc); - /* XXX: other inconsistency will be checked in other patches. */ + if (unlikely(cla->la_uid != pla->la_uid || + cla->la_gid != pla->la_gid)) { + type = LLIT_INCONSISTENT_OWNER; + goto repair; + } repair: if (bk->lb_param & LPF_DRYRUN) { @@ -1757,12 +1965,12 @@ repair: case LLIT_UNMATCHED_PAIR: rc = lfsck_layout_repair_unmatched_pair(env, com, llr, pla); break; - - /* XXX: other inconsistency will be fixed in other patches. */ - case LLIT_MULTIPLE_REFERENCED: + rc = lfsck_layout_repair_multiple_references(env, com, llr, + pla, buf); break; case LLIT_INCONSISTENT_OWNER: + rc = lfsck_layout_repair_owner(env, com, llr, pla); break; default: rc = 0; diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index a8b738b..bfa8b552 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -556,14 +556,21 @@ int lod_generate_and_set_lovea(const struct lu_env *env, } for (i = 0; i < lo->ldo_stripenr; i++) { - const struct lu_fid *fid; + struct lu_fid *fid = &info->lti_fid; struct lod_device *lod; __u32 index; int type = LU_SEQ_RANGE_OST; lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev); LASSERT(lo->ldo_stripe[i]); - fid = lu_object_fid(&lo->ldo_stripe[i]->do_lu); + + *fid = *lu_object_fid(&lo->ldo_stripe[i]->do_lu); + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_MULTIPLE_REF)) { + if (cfs_fail_val == 0) + cfs_fail_val = fid->f_oid; + else + fid->f_oid = cfs_fail_val; + } rc = fid_to_ostid(fid, &info->lti_ostid); LASSERT(rc == 0); diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 2cbd028..5f69061 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -303,6 +303,9 @@ static int lod_declare_attr_set(const struct lu_env *env, if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) { if (!(attr->la_valid & (LA_UID | LA_GID))) RETURN(rc); + + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER)) + RETURN(0); } else { if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE | LA_ATIME | LA_MTIME | LA_CTIME))) @@ -380,6 +383,9 @@ static int lod_attr_set(const struct lu_env *env, if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) { if (!(attr->la_valid & (LA_UID | LA_GID))) RETURN(rc); + + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER)) + RETURN(0); } else { if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE | LA_ATIME | LA_MTIME | LA_CTIME))) diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index b27306f..9e86b9e 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -888,20 +888,24 @@ int mdd_attr_set(const struct lu_env *env, struct md_object *obj, CDEBUG(D_INODE, "setting mtime "LPU64", ctime "LPU64"\n", la->la_mtime, la->la_ctime); - if (la_copy->la_valid & LA_FLAGS) { + mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD); + if (la_copy->la_valid & LA_FLAGS) { rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1); - if (rc == 0) - mdd_flags_xlate(mdd_obj, la_copy->la_flags); - } else if (la_copy->la_valid) { /* setattr */ + if (rc == 0) + mdd_flags_xlate(mdd_obj, la_copy->la_flags); + } else if (la_copy->la_valid) { /* setattr */ rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1); - } + } + mdd_write_unlock(env, mdd_obj); + + if (rc == 0) + rc = mdd_attr_set_changelog(env, obj, handle, la->la_valid); + + GOTO(stop, rc); - if (rc == 0) - rc = mdd_attr_set_changelog(env, obj, handle, - la->la_valid); stop: - mdd_trans_stop(env, mdd, rc, handle); - RETURN(rc); + mdd_trans_stop(env, mdd, rc, handle); + return rc; } static int mdd_xattr_sanity_check(const struct lu_env *env, diff --git a/lustre/osp/osp_internal.h b/lustre/osp/osp_internal.h index b699f74..2af16c0 100644 --- a/lustre/osp/osp_internal.h +++ b/lustre/osp/osp_internal.h @@ -277,10 +277,8 @@ struct osp_thread_info { * no local updates at all */ static inline bool is_only_remote_trans(struct thandle *th) { - return th->th_dev->dd_ops == &osp_dt_ops; + return th->th_dev != NULL && th->th_dev->dd_ops == &osp_dt_ops; } -/* compat define for lfsck, to be removed soon */ -#define is_remote_trans(a) is_only_remote_trans(a) static inline void osp_objid_buf_prep(struct lu_buf *buf, loff_t *off, __u32 *id, int index) diff --git a/lustre/osp/osp_object.c b/lustre/osp/osp_object.c index 5ffb7e4..aaf7546 100644 --- a/lustre/osp/osp_object.c +++ b/lustre/osp/osp_object.c @@ -431,7 +431,7 @@ static int osp_declare_attr_set(const struct lu_env *env, struct dt_object *dt, if (!(attr->la_valid & (LA_UID | LA_GID))) RETURN(0); - if (!is_remote_trans(th)) + if (!is_only_remote_trans(th)) /* * track all UID/GID changes via llog */ @@ -482,7 +482,7 @@ static int osp_attr_set(const struct lu_env *env, struct dt_object *dt, RETURN(0); } - if (!is_remote_trans(th)) + if (!is_only_remote_trans(th)) /* * once transaction is committed put proper command on * the queue going to our OST @@ -879,7 +879,7 @@ static int osp_declare_object_create(const struct lu_env *env, ENTRY; - if (is_remote_trans(th)) { + if (is_only_remote_trans(th)) { LASSERT(fid_is_sane(fid)); rc = osp_md_declare_object_create(env, dt, attr, hint, dof, th); @@ -953,7 +953,7 @@ static int osp_object_create(const struct lu_env *env, struct dt_object *dt, struct lu_fid *fid = &osi->osi_fid; ENTRY; - if (is_remote_trans(th)) { + if (is_only_remote_trans(th)) { LASSERT(fid_is_sane(lu_object_fid(&dt->do_lu))); rc = osp_md_object_create(env, dt, attr, hint, dof, th); diff --git a/lustre/osp/osp_trans.c b/lustre/osp/osp_trans.c index 902de54..dc3481c 100644 --- a/lustre/osp/osp_trans.c +++ b/lustre/osp/osp_trans.c @@ -228,7 +228,7 @@ struct thandle *osp_trans_create(const struct lu_env *env, struct dt_device *d) { struct thandle *th = NULL; struct thandle_update *tu = NULL; - int rc; + int rc = 0; OBD_ALLOC_PTR(th); if (unlikely(th == NULL)) @@ -245,6 +245,8 @@ struct thandle *osp_trans_create(const struct lu_env *env, struct dt_device *d) INIT_LIST_HEAD(&tu->tu_remote_update_list); tu->tu_only_remote_trans = 1; + th->th_update = tu; + out: if (rc != 0) { if (tu != NULL) @@ -267,7 +269,7 @@ static int osp_trans_trigger(const struct lu_env *env, struct osp_device *osp, /* If the transaction only includes remote update, it should * still be asynchronous */ - if (tu->tu_only_remote_trans) { + if (is_only_remote_trans(th)) { struct osp_async_update_args *args; struct ptlrpc_request *req; @@ -323,7 +325,7 @@ int osp_trans_start(const struct lu_env *env, struct dt_device *dt, * If it is remote unlink, it will send the remote req before * the local transaction, i.e. delete the name entry remote * first, then destroy the local object. */ - if (!tu->tu_only_remote_trans && !tu->tu_sent_after_local_trans) + if (!is_only_remote_trans(th) && !tu->tu_sent_after_local_trans) rc = osp_trans_trigger(env, dt2osp_dev(dt), update, th); return rc; @@ -339,26 +341,34 @@ int osp_trans_stop(const struct lu_env *env, struct dt_device *dt, LASSERT(tu != NULL); /* Check whether there are updates related with this OSP */ update = out_find_update(tu, dt); - if (update == NULL) - return rc; + if (update == NULL) { + if (!is_only_remote_trans(th)) + return rc; + goto put; + } - if (update->ur_buf->ub_count == 0) - GOTO(free, rc); + if (update->ur_buf->ub_count == 0) { + out_destroy_update_req(update); + goto put; + } - if (tu->tu_only_remote_trans) { - if (th->th_result == 0) + if (is_only_remote_trans(th)) { + if (th->th_result == 0) { rc = osp_trans_trigger(env, dt2osp_dev(dt), update, th); - else + } else { rc = th->th_result; + out_destroy_update_req(update); + } } else { if (tu->tu_sent_after_local_trans) rc = osp_trans_trigger(env, dt2osp_dev(dt), update, th); rc = update->ur_rc; + out_destroy_update_req(update); } -free: - out_destroy_update_req(update); + +put: thandle_put(th); return rc; } diff --git a/lustre/target/out_lib.c b/lustre/target/out_lib.c index c2d41f7..8ba1fd6 100644 --- a/lustre/target/out_lib.c +++ b/lustre/target/out_lib.c @@ -123,7 +123,8 @@ struct update_request *out_find_create_update_loc(struct thandle *th, list_add_tail(&update->ur_list, &tu->tu_remote_update_list); - thandle_get(th); + if (!tu->tu_only_remote_trans) + thandle_get(th); RETURN(update); } diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 2a40d1c..405422f 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -43,7 +43,7 @@ check_and_setup_lustre ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c" [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] && - ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15" + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17" build_test_filter @@ -1417,6 +1417,117 @@ test_15b() { } run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)" +test_16() { + echo "#####" + echo "If the OST-object's owner information does not match the owner" + echo "information stored in the MDT-object, then the LFSCK trust the" + echo "MDT-object and update the OST-object's owner information." + echo "#####" + + echo "stopall" + stopall > /dev/null + echo "formatall" + formatall > /dev/null + echo "setupall" + setupall > /dev/null + + mkdir -p $DIR/$tdir + $LFS setstripe -c 1 -i 0 $DIR/$tdir + dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1 + cancel_lru_locks osc + sync + sleep 2 + + echo "Inject failure stub to skip OST-object owner changing" + #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613 + chown 1.1 $DIR/$tdir/f0 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + + echo "Trigger layout LFSCK to find out inconsistent OST-object owner" + echo "and fix them" + + $START_LAYOUT || error "(1) Fail to start LFSCK for layout!" + + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_layout | + awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2 + + local repaired=$($SHOW_LAYOUT | + awk '/^repaired_inconsistent_owner/ { print $2 }') + [ $repaired -eq 1 ] || + error "(3) Fail to repair inconsistent owner: $repaired" +} +run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner" + +test_17() { + echo "#####" + echo "If more than one MDT-objects reference the same OST-object," + echo "and the OST-object only recognizes one MDT-object, then the" + echo "LFSCK should create new OST-objects for such non-recognized" + echo "MDT-objects." + echo "#####" + + echo "stopall" + stopall > /dev/null + echo "formatall" + formatall > /dev/null + echo "setupall" + setupall > /dev/null + + mkdir -p $DIR/$tdir + $LFS setstripe -c 1 -i 0 $DIR/$tdir + + echo "Inject failure stub to make two MDT-objects to refernce" + echo "the OST-object" + + do_facet $SINGLEMDS $LCTL set_param fail_val=0 + #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1614 + + dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1 + cancel_lru_locks osc + sync + sleep 2 + + createmany -o $DIR/$tdir/f 1 > /dev/null 2>&1 + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 + + echo "stopall to cleanup object cache" + stopall > /dev/null + echo "setupall" + setupall > /dev/null + + echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects" + local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }') + [ $size -eq 1048576 ] || + error "(1) f0 (wrong) size should be 1048576, but got $size" + + echo "Trigger layout LFSCK to find out multiple refenced MDT-objects" + echo "and fix them" + + $START_LAYOUT || error "(2) Fail to start LFSCK for layout!" + + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_layout | + awk '/^status/ { print \\\$2 }'" "completed" 3 || return 3 + + local repaired=$($SHOW_LAYOUT | + awk '/^repaired_multiple_referenced/ { print $2 }') + [ $repaired -eq 1 ] || + error "(4) Fail to repair multiple references: $repaired" + + echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects" + dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 || + error "(5) Fail to write f0." + size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }') + [ $size -eq 1048576 ] || + error "(6) guard size should be 1048576, but got $size" +} +run_test 17 "LFSCK can repair multiple references" + $LCTL set_param debug=-lfsck > /dev/null || true # restore MDS/OST size -- 1.8.3.1