From: Fan Yong Date: Wed, 30 Apr 2014 02:14:06 +0000 (+0800) Subject: LU-5117 lfsck: naming rules for orphan X-Git-Tag: 2.5.60~12 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=119cbc5f73e82a8d9e4817d68ed00ee77796f248 LU-5117 lfsck: naming rules for orphan The rules for naming orphan under /ROOT/.lustre/lost+found/MDTxxxx/ are as following: 1) The orphan's name should start with the "FID" to simplify the sorting by FID. 2) The name needs to contain the orphan type to indicate why the orphan is created. 3) To resolve the name conflict, there is the conflict version in the name. 4) There can be infix in the name to indicate more information, such as its original name, the original stripe offset, etc. So the name will be like: ${FID}-${infix}-${type}-${conflict_version} Signed-off-by: Fan Yong Change-Id: I6eaa0daff39729f401a4f45eae4c1efdd527275a Reviewed-on: http://review.whamcloud.com/10482 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index 5d619b6..c7c1bfc 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -1906,17 +1906,57 @@ out: } /** - * \retval +1: repaired - * \retval 0: did nothing - * \retval -ve: on error + * This function will create the MDT-object will the given (partial) LOV EA. + * + * Under some data corruption cases, the MDT-object of the file may be lost, + * but its OST-objects, or some of them are there. The layout LFSCK needs to + * re-create the MDT-object with the orphan OST-object(s) information. + * + * On the other hand, the LFSCK may has created some OST-object for repairing + * dangling LOV EA reference, but as the LFSCK processing, it may find that + * the old OST-object is there and should replace the former new created OST + * object. Unfortunately, some others have modified such newly created object. + * To keep the data (both new and old), the LFSCK will create MDT-object with + * new FID to reference the original OST-object. + * + * \param[in] env pointer to the thread context + * \param[in] com pointer to the lfsck component + * \param[in] ltd pointer to target device descriptor + * \param[in] rec pointer to the record for the orphan OST-object + * \param[in] cfid pointer to FID for the orphan OST-object + * \param[in] infix additional information, such as the FID for original + * MDT-object and the stripe offset in the LOV EA + * \param[in] type the type for describing why the orphan MDT-object is + * created. The rules are as following: + * + * type "C": Multiple OST-objects claim the same MDT-object and the + * same slot in the layout EA. Then the LFSCK will create + * new MDT-object(s) to hold the conflict OST-object(s). + * + * type "N": The orphan OST-object does not know which one was the + * real parent MDT-object, so the LFSCK uses new FID for + * its parent MDT-object. + * + * type "R": The orphan OST-object knows its parent MDT-object FID, + * but does not know the position (the file name) in the + * namespace. + * + * The orphan name will be like: + * ${FID}-${infix}-${type}-${conflict_version} + * + * \param[in] ea_off the stripe offset in the LOV EA + * + * \retval positive on repaired something + * \retval if needs to repair nothing + * \retval negative error number on failure */ static int lfsck_layout_recreate_parent(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd, struct lu_orphan_rec *rec, struct lu_fid *cfid, - const char *prefix, - const char *postfix, + const char *infix, + const char *type, __u32 ea_off) { struct lfsck_thread_info *info = lfsck_env_info(env); @@ -1964,48 +2004,24 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, RETURN(PTR_ERR(cobj)); } - CDEBUG(D_LFSCK, "Re-create the lost MDT-object: parent " - DFID", child "DFID", OST-index %u, stripe-index %u, " - "prefix %s, postfix %s\n", - PFID(pfid), PFID(cfid), ltd->ltd_index, ea_off, prefix, postfix); + CDEBUG(D_LFSCK, "Re-create the lost MDT-object: parent "DFID", child " + DFID", OST-index %u, stripe-index %u, infix %s, type %s\n", + PFID(pfid), PFID(cfid), ltd->ltd_index, ea_off, infix, type); pobj = lfsck_object_find_by_dev(env, lfsck->li_bottom, pfid); if (IS_ERR(pobj)) GOTO(put, rc = PTR_ERR(pobj)); - LASSERT(prefix != NULL); - LASSERT(postfix != NULL); + LASSERT(infix != NULL); + LASSERT(type != NULL); - /** name rules: - * - * 1. Use the MDT-object's FID as the name with prefix and postfix. - * - * 1.1 prefix "C-": More than one OST-objects claim the same - * MDT-object and the same slot in the layout EA. - * It may be created for dangling referenced MDT - * object or may be not. - * 1.2 prefix "N-": The orphan OST-object does not know which one - * is the real parent, so the LFSCK assign a new - * FID as its parent. - * 1.3 prefix "R-": The orphan OST-object know its parent FID but - * does not know the position in the namespace. - * - * 2. If there is name conflict, append more index for new name. */ - sprintf(name, "%s"DFID"%s", prefix, PFID(pfid), postfix); do { + snprintf(name, NAME_MAX, DFID"%s-%s-%d", PFID(pfid), infix, + type, idx++); rc = dt_lookup(env, lfsck->li_lpf_obj, (struct dt_rec *)tfid, (const struct dt_key *)name, BYPASS_CAPA); if (rc != 0 && rc != -ENOENT) GOTO(put, rc); - - if (unlikely(rc == 0)) { - CWARN("%s: The name %s under lost+found has been used " - "by the "DFID". Try to increase the FID version " - "for the new file name.\n", - lfsck_lfsck2name(lfsck), name, PFID(tfid)); - sprintf(name, "%s"DFID"%s-%d", prefix, PFID(pfid), - postfix, ++idx); - } } while (rc == 0); memset(la, 0, sizeof(*la)); @@ -2299,7 +2315,7 @@ static int lfsck_layout_conflict_create(const struct lu_env *env, struct lfsck_thread_info *info = lfsck_env_info(env); struct lu_fid *cfid2 = &info->lti_fid2; struct ost_id *oi = &info->lti_oi; - char *postfix = info->lti_tmpbuf; + char *infix = info->lti_tmpbuf; struct lov_mds_md_v1 *lmm = ea_buf->lb_buf; struct dt_device *dev = com->lc_lfsck->li_bottom; struct thandle *th = NULL; @@ -2335,10 +2351,10 @@ static int lfsck_layout_conflict_create(const struct lu_env *env, ea_buf->lb_len = ori_len; fid_zero(&rec->lor_fid); - snprintf(postfix, LFSCK_TMPBUF_LEN, "-"DFID"-%x", + snprintf(infix, LFSCK_TMPBUF_LEN, "-"DFID"-%x", PFID(lu_object_fid(&parent->do_lu)), ea_off); rc = lfsck_layout_recreate_parent(env, com, ltd, rec, cfid, - "C-", postfix, ea_off); + infix, "C", ea_off); RETURN(rc); } @@ -2649,7 +2665,7 @@ static int lfsck_layout_scan_orphan_one(const struct lu_env *env, if (fid_is_zero(pfid)) { rc = lfsck_layout_recreate_parent(env, com, ltd, rec, cfid, - "N-", "", ea_off); + "", "N", ea_off); GOTO(out, rc); } @@ -2667,7 +2683,7 @@ static int lfsck_layout_scan_orphan_one(const struct lu_env *env, if (dt_object_exists(parent) == 0) { lu_object_put(env, &parent->do_lu); rc = lfsck_layout_recreate_parent(env, com, ltd, rec, cfid, - "R-", "", ea_off); + "", "R", ea_off); GOTO(out, rc); } diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 638a523..566bb53 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -1688,11 +1688,11 @@ test_18b() { fi echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace" - mv $MOUNT/.lustre/lost+found/MDT0000/R-${fid1} $DIR/$tdir/a1/f1 || - error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/R-${fid1}" + mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 || + error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0" if [ $MDSCOUNT -ge 2 ]; then - local name=$MOUNT/.lustre/lost+found/MDT0001/R-${fid2} + local name=$MOUNT/.lustre/lost+found/MDT0001/${fid2}-R-0 mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name" fi @@ -1808,11 +1808,11 @@ test_18c() { fi echo "There should be some stub under .lustre/lost+found/MDT0001/" - ls -ail $MOUNT/.lustre/lost+found/MDT0001/N-* && + ls -ail $MOUNT/.lustre/lost+found/MDT0001/*-N-0 && error "(6) .lustre/lost+found/MDT0001/ should be empty" echo "There should be some stub under .lustre/lost+found/MDT0000/" - ls -ail $MOUNT/.lustre/lost+found/MDT0000/N-* || + ls -ail $MOUNT/.lustre/lost+found/MDT0000/*-N-0 || error "(7) .lustre/lost+found/MDT0000/ should not be empty" } run_test 18c "Find out orphan OST-object and repair it (3)" @@ -2001,7 +2001,7 @@ test_18e() { error "(6) Expect 1 orphan has been fixed, but got: $repaired" echo "There should be stub file under .lustre/lost+found/MDT0000/" - local cname=$(ls $MOUNT/.lustre/lost+found/MDT0000/C-*) + local cname=$(ls $MOUNT/.lustre/lost+found/MDT0000/*-C-0) [ ! -z $name ] || error "(7) .lustre/lost+found/MDT0000/ should not be empty" @@ -2201,9 +2201,9 @@ test_20() { LOV_PATTERN_F_HOLE=0x40000000 # - # R-${fid0} is the old f0 + # ${fid0}-R-0 is the old f0 # - local name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid0}" + local name="$MOUNT/.lustre/lost+found/MDT0000/${fid0}-R-0" echo "Check $name, which is the old f0" $LFS getstripe -v $name || error "(5.1) cannot getstripe on $name" @@ -2236,9 +2236,9 @@ test_20() { rm -f $name || error "(5.9) cannot unlink $name" # - # R-${fid1} contains the old f1's stripe1 (and stripe2 if OSTs > 2) + # ${fid1}-R-0 contains the old f1's stripe1 (and stripe2 if OSTs > 2) # - name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid1}" + name="$MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0" if [ $OSTCOUNT -gt 2 ]; then echo "Check $name, it contains the old f1's stripe1 and stripe2" else @@ -2292,9 +2292,9 @@ test_20() { rm -f $name || error "(6.13) cannot unlink $name" # - # R-${fid2} it contains the old f2's stripe0 (and stripe2 if OSTs > 2) + # ${fid2}-R-0 it contains the old f2's stripe0 (and stripe2 if OSTs > 2) # - name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid2}" + name="$MOUNT/.lustre/lost+found/MDT0000/${fid2}-R-0" if [ $OSTCOUNT -gt 2 ]; then echo "Check $name, it contains the old f2's stripe0 and stripe2" else @@ -2368,9 +2368,9 @@ test_20() { [ $OSTCOUNT -le 2 ] && return # - # R-${fid3} should contains the old f3's stripe0 and stripe1 + # ${fid3}-R-0 should contains the old f3's stripe0 and stripe1 # - name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid3}" + name="$MOUNT/.lustre/lost+found/MDT0000/${fid3}-R-0" echo "Check $name, which contains the old f3's stripe0 and stripe1" $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"