From bad2b3e71c887d58d7834f5e4187c29e36c4b1ab Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Tue, 15 Jan 2013 17:57:33 +0800 Subject: [PATCH 1/1] LU-1866 lfsck: LFSCK for namespace consistency (3) It is the third part of LFSCK 1.5 for namespace consistency check and repair. It mainly implements the LFSCK namespace component functionality lfsck_operations::lfsck_double_scan: process multiple-linked objects. 1) Find out and repair crashed linkEA entry. 2) Find out and remove invalid linkEA entry Test-Parameters: envdefinitions=ENABLE_QUOTA=yes testlist=sanity-lfsck Signed-off-by: Fan Yong Change-Id: Ia38fa343ed4da10635c1af4d90b9039c5e01688c Reviewed-on: http://review.whamcloud.com/4914 Tested-by: Hudson Reviewed-by: Alex Zhuravlev Reviewed-by: Jian Yu Reviewed-by: Andreas Dilger --- lustre/include/obd_support.h | 6 + lustre/mdd/mdd_dir.c | 10 ++ lustre/mdd/mdd_lfsck.c | 392 ++++++++++++++++++++++++++++++++++++++++-- lustre/tests/sanity-lfsck.sh | 399 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 789 insertions(+), 18 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 08065f3..7858647 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -473,8 +473,14 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, /* LFSCK */ #define OBD_FAIL_LFSCK_DELAY1 0x1600 #define OBD_FAIL_LFSCK_DELAY2 0x1601 +#define OBD_FAIL_LFSCK_DELAY3 0x1602 +#define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 +#define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 #define OBD_FAIL_LFSCK_FATAL1 0x1608 #define OBD_FAIL_LFSCK_FATAL2 0x1609 +#define OBD_FAIL_LFSCK_CRASH 0x160a +#define OBD_FAIL_LFSCK_NO_AUTO 0x160b +#define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c /* Assign references to moved code to reduce code changes */ #define OBD_FAIL_PRECHECK(id) CFS_FAIL_PRECHECK(id) diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index e60f9e4..5fb1cf8 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -2559,6 +2559,8 @@ static int mdd_lee_pack(struct link_ea_entry *lee, const struct lu_name *lname, int reclen; fid_cpu_to_be(&tmpfid, pfid); + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_CRASH)) + tmpfid.f_ver = ~0; memcpy(&lee->lee_parent_fid, &tmpfid, sizeof(tmpfid)); memcpy(lee->lee_name, lname->ln_name, lname->ln_namelen); reclen = sizeof(struct link_ea_entry) + lname->ln_namelen; @@ -2708,6 +2710,14 @@ static int __mdd_links_add(const struct lu_env *env, return -EEXIST; } + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_MORE)) { + struct lu_fid *tfid = &mdd_env_info(env)->mti_fid2; + + *tfid = *pfid; + tfid->f_ver = ~0; + mdd_links_add_buf(env, ldata, lname, tfid); + } + return mdd_links_add_buf(env, ldata, lname, pfid); } diff --git a/lustre/mdd/mdd_lfsck.c b/lustre/mdd/mdd_lfsck.c index 78228ab..c31f3ad 100644 --- a/lustre/mdd/mdd_lfsck.c +++ b/lustre/mdd/mdd_lfsck.c @@ -714,6 +714,7 @@ static int mdd_lfsck_namespace_unlink(const struct lu_env *env, struct dt_object *child = com->lc_obj; struct dt_object *parent; struct thandle *handle; + bool locked = false; int rc; ENTRY; @@ -721,7 +722,7 @@ static int mdd_lfsck_namespace_unlink(const struct lu_env *env, if (IS_ERR(parent)) RETURN(rc = PTR_ERR(parent)); - if (dt_try_as_dir(env, parent)) + if (!dt_try_as_dir(env, parent)) GOTO(out, rc = -ENOTDIR); handle = dt_trans_create(env, mdd->mdd_bottom); @@ -737,6 +738,8 @@ static int mdd_lfsck_namespace_unlink(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); + dt_write_lock(env, child, MOR_TGT_CHILD); + locked = true; rc = dt_delete(env, parent, (struct dt_key *)lfsck_namespace_name, handle, BYPASS_CAPA); if (rc != 0) @@ -755,14 +758,18 @@ static int mdd_lfsck_namespace_unlink(const struct lu_env *env, rc = dt_destroy(env, child, handle); + + GOTO(stop, rc); + +stop: + if (locked) + dt_write_unlock(env, child); + if (rc == 0) { lu_object_put(env, &child->do_lu); com->lc_obj = NULL; } - GOTO(stop, rc); - -stop: dt_trans_stop(env, mdd->mdd_bottom, handle); out: @@ -784,6 +791,40 @@ static int mdd_lfsck_namespace_lookup(const struct lu_env *env, return rc; } +static int mdd_lfsck_namespace_delete(const struct lu_env *env, + struct lfsck_component *com, + const struct lu_fid *fid) +{ + struct mdd_device *mdd = mdd_lfsck2mdd(com->lc_lfsck); + struct lu_fid *key = &mdd_env_info(env)->mti_fid; + struct thandle *handle; + struct dt_object *obj = com->lc_obj; + int rc; + ENTRY; + + handle = dt_trans_create(env, mdd->mdd_bottom); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + rc = dt_declare_delete(env, obj, (const struct dt_key *)fid, handle); + if (rc != 0) + GOTO(out, rc); + + rc = dt_trans_start_local(env, mdd->mdd_bottom, handle); + if (rc != 0) + GOTO(out, rc); + + fid_cpu_to_be(key, fid); + rc = dt_delete(env, obj, (const struct dt_key *)key, handle, + BYPASS_CAPA); + + GOTO(out, rc); + +out: + dt_trans_stop(env, mdd->mdd_bottom, handle); + return rc; +} + static int mdd_lfsck_namespace_update(const struct lu_env *env, struct lfsck_component *com, const struct lu_fid *fid, @@ -851,6 +892,179 @@ out: return rc; } +/** + * \retval +ve repaired + * \retval 0 no need to repair + * \retval -ve error cases + */ +static int mdd_lfsck_namespace_double_scan_one(const struct lu_env *env, + struct lfsck_component *com, + struct mdd_object *child, + __u8 flags) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_attr *la = &info->mti_la; + struct lu_name *cname = &info->mti_name; + struct lu_fid *pfid = &info->mti_fid; + struct lu_fid *cfid = &info->mti_fid2; + struct md_lfsck *lfsck = com->lc_lfsck; + struct mdd_device *mdd = mdd_lfsck2mdd(lfsck); + struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram; + struct lfsck_namespace *ns = + (struct lfsck_namespace *)com->lc_file_ram; + struct mdd_link_data ldata = { 0 }; + struct thandle *handle = NULL; + bool locked = false; + bool update = false; + int count; + int rc; + ENTRY; + + if (com->lc_journal) { + +again: + LASSERT(!locked); + + com->lc_journal = 1; + handle = mdd_trans_create(env, mdd); + if (IS_ERR(handle)) + RETURN(rc = PTR_ERR(handle)); + + rc = mdd_declare_links_add(env, child, handle);; + if (rc != 0) + GOTO(stop, rc); + + rc = mdd_trans_start(env, mdd, handle); + if (rc != 0) + GOTO(stop, rc); + + mdd_write_lock(env, child, MOR_TGT_CHILD); + locked = true; + } + + if (unlikely(mdd_is_dead_obj(child))) + GOTO(stop, rc = 0); + + rc = mdd_links_read(env, child, &ldata); + if (rc != 0) { + if ((bk->lb_param & LPF_DRYRUN) && + (rc == -EINVAL || rc == -ENODATA)) + rc = 1; + + GOTO(stop, rc); + } + + rc = mdd_la_get(env, child, la, BYPASS_CAPA); + if (rc != 0) + GOTO(stop, rc); + + ldata.ml_lee = (struct link_ea_entry *)(ldata.ml_leh + 1); + count = ldata.ml_leh->leh_reccount; + while (count-- > 0) { + struct mdd_object *parent = NULL; + struct dt_object *dir; + + mdd_lee_unpack(ldata.ml_lee, &ldata.ml_reclen, cname, pfid); + if (!fid_is_sane(pfid)) + goto shrink; + + parent = mdd_object_find(env, mdd, pfid); + if (parent == NULL) + goto shrink; + else if (IS_ERR(parent)) + GOTO(stop, rc = PTR_ERR(parent)); + + if (!mdd_object_exists(parent)) + goto shrink; + + /* XXX: need more processing for remote object in the future. */ + if (mdd_object_remote(parent)) { + mdd_object_put(env, parent); + ldata.ml_lee = (struct link_ea_entry *) + ((char *)ldata.ml_lee + ldata.ml_reclen); + continue; + } + + dir = mdd_object_child(parent); + if (unlikely(!dt_try_as_dir(env, dir))) + goto shrink; + + /* To guarantee the 'name' is terminated with '0'. */ + memcpy(info->mti_key, cname->ln_name, cname->ln_namelen); + info->mti_key[cname->ln_namelen] = 0; + cname->ln_name = info->mti_key; + rc = dt_lookup(env, dir, (struct dt_rec *)cfid, + (const struct dt_key *)cname->ln_name, + BYPASS_CAPA); + if (rc != 0 && rc != -ENOENT) { + mdd_object_put(env, parent); + GOTO(stop, rc); + } + + if (rc == 0) { + if (lu_fid_eq(cfid, mdo2fid(child))) { + mdd_object_put(env, parent); + ldata.ml_lee = (struct link_ea_entry *) + ((char *)ldata.ml_lee + ldata.ml_reclen); + continue; + } + + goto shrink; + } + + if (ldata.ml_leh->leh_reccount > la->la_nlink) + goto shrink; + + /* XXX: For the case of there is linkea entry, but without name + * entry pointing to the object, and the object link count + * isn't less than the count of name entries, then add the + * name entry back to namespace. + * + * It is out of LFSCK 1.5 scope, will implement it in the + * future. Keep the linkEA entry. */ + mdd_object_put(env, parent); + ldata.ml_lee = (struct link_ea_entry *) + ((char *)ldata.ml_lee + ldata.ml_reclen); + continue; + +shrink: + if (parent != NULL) + mdd_object_put(env, parent); + if (bk->lb_param & LPF_DRYRUN) + RETURN(1); + + CDEBUG(D_LFSCK, "Remove linkEA: "DFID"[%.*s], "DFID"\n", + PFID(mdo2fid(child)), cname->ln_namelen, cname->ln_name, + PFID(pfid)); + mdd_links_del_buf(env, &ldata, cname); + update = true; + } + + if (update) { + if (!com->lc_journal) { + com->lc_journal = 1; + goto again; + } + + rc = mdd_links_write(env, child, &ldata, handle); + } + + GOTO(stop, rc); + +stop: + if (locked) + mdd_write_unlock(env, child); + + if (handle != NULL) + mdd_trans_stop(env, mdd, rc, handle); + + if (rc == 0 && update) { + ns->ln_objs_nlink_repaired++; + rc = 1; + } + return rc; +} + /* namespace APIs */ static int mdd_lfsck_namespace_reset(const struct lu_env *env, @@ -1582,33 +1796,169 @@ out: return ret; } -/* XXX: to be implemented in other patch. */ static int mdd_lfsck_namespace_double_scan(const struct lu_env *env, struct lfsck_component *com) { struct md_lfsck *lfsck = com->lc_lfsck; + struct ptlrpc_thread *thread = &lfsck->ml_thread; + struct mdd_device *mdd = mdd_lfsck2mdd(lfsck); struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram; struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram; + struct dt_object *obj = com->lc_obj; + const struct dt_it_ops *iops = &obj->do_index_ops->dio_it; + struct mdd_object *target; + struct dt_it *di; + struct dt_key *key; + struct lu_fid fid; int rc; + __u8 flags; + ENTRY; + + lfsck->ml_new_scanned = 0; + lfsck->ml_time_last_checkpoint = cfs_time_current(); + lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint + + cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + + di = iops->init(env, obj, 0, BYPASS_CAPA); + if (IS_ERR(di)) + RETURN(PTR_ERR(di)); + + fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2); + rc = iops->get(env, di, (const struct dt_key *)&fid); + if (rc < 0) + GOTO(fini, rc); + + /* Skip the start one, which either has been processed or non-exist. */ + rc = iops->next(env, di); + if (rc != 0) + GOTO(put, rc); + + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_DOUBLESCAN)) + GOTO(put, rc = 0); + + do { + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) && + cfs_fail_val > 0) { + struct l_wait_info lwi; + + lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), + NULL, NULL); + l_wait_event(thread->t_ctl_waitq, + !thread_is_running(thread), + &lwi); + } + + key = iops->key(env, di); + fid_be_to_cpu(&fid, (const struct lu_fid *)key); + target = mdd_object_find(env, mdd, &fid); + down_write(&com->lc_sem); + if (target == NULL) { + rc = 0; + goto checkpoint; + } else if (IS_ERR(target)) { + rc = PTR_ERR(target); + goto checkpoint; + } + + /* XXX: need more processing for remote object in the future. */ + if (!mdd_object_exists(target) || mdd_object_remote(target)) + goto obj_put; + + rc = iops->rec(env, di, (struct dt_rec *)&flags, 0); + if (rc == 0) + rc = mdd_lfsck_namespace_double_scan_one(env, com, + target, flags); + +obj_put: + mdd_object_put(env, target); + +checkpoint: + lfsck->ml_new_scanned++; + com->lc_new_checked++; + ns->ln_fid_latest_scanned_phase2 = fid; + if (rc > 0) + ns->ln_objs_repaired_phase2++; + else if (rc < 0) + ns->ln_objs_failed_phase2++; + up_write(&com->lc_sem); + + if ((rc == 0) || ((rc > 0) && !(bk->lb_param & LPF_DRYRUN))) { + mdd_lfsck_namespace_delete(env, com, &fid); + } else if (rc < 0) { + flags |= LLF_REPAIR_FAILED; + mdd_lfsck_namespace_update(env, com, &fid, flags, true); + } + + if (rc < 0 && bk->lb_param & LPF_FAILOUT) + GOTO(put, rc); + + if (likely(cfs_time_beforeq(cfs_time_current(), + lfsck->ml_time_next_checkpoint)) || + com->lc_new_checked == 0) + goto speed; + + down_write(&com->lc_sem); + ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() + + HALF_SEC - lfsck->ml_time_last_checkpoint); + ns->ln_time_last_checkpoint = cfs_time_current_sec(); + ns->ln_objs_checked_phase2 += com->lc_new_checked; + com->lc_new_checked = 0; + rc = mdd_lfsck_namespace_store(env, com, false); + up_write(&com->lc_sem); + if (rc != 0) + GOTO(put, rc); + lfsck->ml_time_last_checkpoint = cfs_time_current(); + lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint + + cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + +speed: + mdd_lfsck_control_speed(lfsck); + if (unlikely(!thread_is_running(thread))) + GOTO(put, rc = 0); + + rc = iops->next(env, di); + } while (rc == 0); + + GOTO(put, rc); + +put: + iops->put(env, di); + +fini: + iops->fini(env, di); down_write(&com->lc_sem); + ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() + + HALF_SEC - lfsck->ml_time_last_checkpoint); ns->ln_time_last_checkpoint = cfs_time_current_sec(); + ns->ln_objs_checked_phase2 += com->lc_new_checked; com->lc_new_checked = 0; - com->lc_journal = 0; - ns->ln_status = LS_COMPLETED; - if (!(bk->lb_param & LPF_DRYRUN)) - ns->ln_flags &= - ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE); - ns->ln_time_last_complete = ns->ln_time_last_checkpoint; - ns->ln_success_count++; + if (rc > 0) { + com->lc_journal = 0; + ns->ln_status = LS_COMPLETED; + if (!(bk->lb_param & LPF_DRYRUN)) + ns->ln_flags &= + ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE); + ns->ln_time_last_complete = ns->ln_time_last_checkpoint; + ns->ln_success_count++; + } else if (rc == 0) { + if (lfsck->ml_paused) + ns->ln_status = LS_PAUSED; + else + ns->ln_status = LS_STOPPED; + } else { + ns->ln_status = LS_FAILED; + } - spin_lock(&lfsck->ml_lock); - cfs_list_del_init(&com->lc_link); - cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle); - spin_unlock(&lfsck->ml_lock); + if (ns->ln_status != LS_PAUSED) { + spin_lock(&lfsck->ml_lock); + cfs_list_del_init(&com->lc_link); + cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle); + spin_unlock(&lfsck->ml_lock); + } rc = mdd_lfsck_namespace_store(env, com, false); @@ -2185,6 +2535,9 @@ static int mdd_lfsck_oit_engine(const struct lu_env *env, &lwi); } + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH)) + RETURN(0); + lfsck->ml_new_scanned++; rc = iops->rec(env, di, (struct dt_rec *)fid, 0); if (rc != 0) { @@ -2305,7 +2658,8 @@ static int mdd_lfsck_main(void *args) if (lfsck->ml_paused && cfs_list_empty(&lfsck->ml_list_scan)) oit_iops->put(&env, oit_di); - rc = mdd_lfsck_post(&env, lfsck, rc); + if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH)) + rc = mdd_lfsck_post(&env, lfsck, rc); if (lfsck->ml_di_dir != NULL) mdd_lfsck_close_dir(&env, lfsck); @@ -2384,7 +2738,9 @@ int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck, RETURN(-ENOTSUPP); /* start == NULL means auto trigger paused LFSCK. */ - if (start == NULL && cfs_list_empty(&lfsck->ml_list_scan)) + if ((start == NULL) && + (cfs_list_empty(&lfsck->ml_list_scan) || + OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO))) RETURN(0); mutex_lock(&lfsck->ml_mutex); diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 9902d17..5e454d6 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -202,6 +202,83 @@ test_1b() } run_test 1b "LFSCK can find out and repair missed FID-in-LMA" +test_2a() { + lfsck_prep 1 1 + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(1) Fail to start MDS!" + + mount_client $MOUNT || error "(2) Fail to start client!" + + #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603 + touch $DIR/$tdir/dummy + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + umount_client $MOUNT + $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" + + sleep 3 + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(4) Expect 'completed', but got '$STATUS'" + + local repaired=$($SHOW_NAMESPACE | + awk '/^updated_phase1/ { print $2 }') + [ $repaired -eq 1 ] || + error "(5) Fail to repair crashed linkEA: $repaired" + + mount_client $MOUNT || error "(6) Fail to start client!" + + stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null || + error "(7) Fail to stat $DIR/$tdir/dummy" + + local dummyfid=$($LFS path2fid $DIR/$tdir/dummy) + local dummyname=$($LFS fid2path $DIR $dummyfid) + [ "$dummyname" == "$DIR/$tdir/dummy" ] || + error "(8) Fail to repair linkEA: $dummyfid $dummyname" +} +run_test 2a "LFSCK can find out and repair crashed linkEA entry" + +test_2b() +{ + lfsck_prep 1 1 + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(1) Fail to start MDS!" + + mount_client $MOUNT || error "(2) Fail to start client!" + + #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 + touch $DIR/$tdir/dummy + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + umount_client $MOUNT + $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" + + sleep 3 + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(4) Expect 'completed', but got '$STATUS'" + + local repaired=$($SHOW_NAMESPACE | + awk '/^updated_phase2/ { print $2 }') + [ $repaired -eq 1 ] || + error "(5) Fail to repair crashed linkEA: $repaired" + + mount_client $MOUNT || error "(6) Fail to start client!" + + stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null || + error "(7) Fail to stat $DIR/$tdir/dummy" + + local dummyfid=$($LFS path2fid $DIR/$tdir/dummy) + local dummyname=$($LFS fid2path $DIR $dummyfid) + [ "$dummyname" == "$DIR/$tdir/dummy" ] || + error "(8) Fail to repair linkEA: $dummyfid $dummyname" +} +run_test 2b "LFSCK can find out and remove invalid linkEA entry" + test_4() { lfsck_prep 3 3 @@ -454,6 +531,177 @@ test_7a() } run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)" +test_7b() +{ + lfsck_prep 2 2 + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(1) Fail to start MDS!" + + mount_client $MOUNT || error "(2) Fail to start client!" + + #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 + for ((i=0; i<10; i++)); do + touch $DIR/$tdir/dummy${i} + done + + #define OBD_FAIL_LFSCK_DELAY3 0x1602 + do_facet $SINGLEMDS $LCTL set_param fail_val=1 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602 + $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" + + sleep 3 + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase2" ] || + error "(4) Expect 'scanning-phase2', but got '$STATUS'" + + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!" + + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(6) Fail to start MDS!" + + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase2" ] || + error "(7) Expect 'scanning-phase2', but got '$STATUS'" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 + sleep 3 + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(8) Expect 'completed', but got '$STATUS'" +} +run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)" + +test_8() +{ + lfsck_prep 20 20 + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(1) Fail to start MDS!" + + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "init" ] || + error "(2) Expect 'init', but got '$STATUS'" + + mount_client $MOUNT || error "(3) Fail to start client!" + + #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603 + mkdir $DIR/$tdir/crashed + + #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 + for ((i=0; i<5; i++)); do + touch $DIR/$tdir/dummy${i} + done + + #define OBD_FAIL_LFSCK_DELAY2 0x1601 + do_facet $SINGLEMDS $LCTL set_param fail_val=2 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 + $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!" + + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase1" ] || + error "(5) Expect 'scanning-phase1', but got '$STATUS'" + + $STOP_LFSCK || error "(6) Fail to stop LFSCK!" + + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "stopped" ] || + error "(7) Expect 'stopped', but got '$STATUS'" + + $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!" + + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase1" ] || + error "(9) Expect 'scanning-phase1', but got '$STATUS'" + + #define OBD_FAIL_LFSCK_FATAL2 0x1609 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609 + sleep 3 + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "failed" ] || + error "(10) Expect 'failed', but got '$STATUS'" + + #define OBD_FAIL_LFSCK_DELAY1 0x1600 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600 + $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!" + + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase1" ] || + error "(12) Expect 'scanning-phase1', but got '$STATUS'" + + #define OBD_FAIL_LFSCK_CRASH 0x160a + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a + sleep 5 + + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!" + + #define OBD_FAIL_LFSCK_NO_AUTO 0x160b + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b + + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(14) Fail to start MDS!" + + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "crashed" ] || + error "(15) Expect 'crashed', but got '$STATUS'" + + #define OBD_FAIL_LFSCK_DELAY2 0x1601 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 + $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!" + + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase1" ] || + error "(17) Expect 'scanning-phase1', but got '$STATUS'" + + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!" + + #define OBD_FAIL_LFSCK_NO_AUTO 0x160b + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b + + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(19) Fail to start MDS!" + + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "paused" ] || + error "(20) Expect 'paused', but got '$STATUS'" + + #define OBD_FAIL_LFSCK_DELAY3 0x1602 + do_facet $SINGLEMDS $LCTL set_param fail_val=2 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602 + + $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!" + sleep 2 + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase2" ] || + error "(22) Expect 'scanning-phase2', but got '$STATUS'" + + local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') + [ "$FLAGS" == "scanned-once,inconsistent" ] || + error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 + sleep 2 + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(24) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') + [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'" + +} +run_test 8 "LFSCK state machine" + test_9a() { if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then skip "Testing on UP system, the speed may be inaccurate." @@ -505,6 +753,157 @@ test_9a() { } run_test 9a "LFSCK speed control (1)" +test_9b() { + if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then + skip "Testing on UP system, the speed may be inaccurate." + return 0 + fi + + lfsck_prep 0 0 + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(1) Fail to start MDS!" + + mount_client $MOUNT || error "(2) Fail to start client!" + + echo "Another preparing... 50 * 50 files (with error) will be created." + #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 + for ((i=0; i<50; i++)); do + mkdir -p $DIR/$tdir/d${i} + touch $DIR/$tdir/f${i} + for ((j=0; j<50; j++)); do + touch $DIR/$tdir/d${i}/f${j} + done + done + + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c + $START_NAMESPACE || error "(4) Fail to start LFSCK!" + + sleep 10 + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "stopped" ] || + error "(5) Expect 'stopped', but got '$STATUS'" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + $START_NAMESPACE -s 50 || error "(6) Fail to start LFSCK!" + + sleep 10 + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase2" ] || + error "(7) Expect 'scanning-phase2', but got '$STATUS'" + + local SPEED=$($SHOW_NAMESPACE | + awk '/^average_speed_phase2/ { print $2 }') + # (50 * (10 + 1)) / 10 = 55 + [ $SPEED -lt 60 ] || + error "(8) Unexpected speed $SPEED, should not more than 60" + + # adjust speed limit + do_facet $SINGLEMDS \ + $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 150 + sleep 10 + + SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }') + # (50 * (10 - 1) + 150 * (10 - 1)) / 20 = 90 + [ $SPEED -lt 85 ] && + error "(9) Unexpected speed $SPEED, should not less than 85" + + # (50 * (10 + 1) + 150 * (10 + 1)) / 20 = 110 + [ $SPEED -lt 115 ] || + error "(10) Unexpected speed $SPEED, should not more than 115" + + do_facet $SINGLEMDS \ + $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0 + sleep 5 + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(11) Expect 'completed', but got '$STATUS'" +} +run_test 9b "LFSCK speed control (2)" + +test_10() +{ + lfsck_prep 1 1 + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(1) Fail to start MDS!" + + mount_client $MOUNT || error "(2) Fail to start client!" + + #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603 + for ((i=0; i<1000; i=$((i+2)))); do + mkdir -p $DIR/$tdir/d${i} + touch $DIR/$tdir/f${i} + for ((j=0; j<5; j++)); do + touch $DIR/$tdir/d${i}/f${j} + done + done + + #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 + for ((i=1; i<1000; i=$((i+2)))); do + mkdir -p $DIR/$tdir/d${i} + touch $DIR/$tdir/f${i} + for ((j=0; j<5; j++)); do + touch $DIR/$tdir/d${i}/f${j} + done + done + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy + + umount_client $MOUNT + mount_client $MOUNT || error "(3) Fail to start client!" + + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "init" ] || + error "(4) Expect 'init', but got '$STATUS'" + + $START_NAMESPACE -s 100 || error "(5) Fail to start LFSCK!" + + sleep 10 + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase1" ] || + error "(6) Expect 'scanning-phase1', but got '$STATUS'" + + ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!" + + touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!" + + mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!" + + unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!" + + rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!" + + mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!" + + ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!" + + ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 || + error "(14) Fail to softlink!" + + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase1" ] || + error "(15) Expect 'scanning-phase1', but got '$STATUS'" + + do_facet $SINGLEMDS \ + $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0 + umount_client $MOUNT + sleep 10 + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(16) Expect 'completed', but got '$STATUS'" +} +run_test 10 "System is available during LFSCK scanning" + $LCTL set_param debug=-lfsck > /dev/null || true # restore MDS/OST size -- 1.8.3.1