/* LFSCK */
#define OBD_FAIL_LFSCK_DELAY1 0x1600
#define OBD_FAIL_LFSCK_DELAY2 0x1601
+#define OBD_FAIL_LFSCK_DELAY3 0x1602
+#define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
+#define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
#define OBD_FAIL_LFSCK_FATAL1 0x1608
#define OBD_FAIL_LFSCK_FATAL2 0x1609
+#define OBD_FAIL_LFSCK_CRASH 0x160a
+#define OBD_FAIL_LFSCK_NO_AUTO 0x160b
+#define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
/* Assign references to moved code to reduce code changes */
#define OBD_FAIL_PRECHECK(id) CFS_FAIL_PRECHECK(id)
int reclen;
fid_cpu_to_be(&tmpfid, pfid);
+ if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_CRASH))
+ tmpfid.f_ver = ~0;
memcpy(&lee->lee_parent_fid, &tmpfid, sizeof(tmpfid));
memcpy(lee->lee_name, lname->ln_name, lname->ln_namelen);
reclen = sizeof(struct link_ea_entry) + lname->ln_namelen;
return -EEXIST;
}
+ if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_MORE)) {
+ struct lu_fid *tfid = &mdd_env_info(env)->mti_fid2;
+
+ *tfid = *pfid;
+ tfid->f_ver = ~0;
+ mdd_links_add_buf(env, ldata, lname, tfid);
+ }
+
return mdd_links_add_buf(env, ldata, lname, pfid);
}
struct dt_object *child = com->lc_obj;
struct dt_object *parent;
struct thandle *handle;
+ bool locked = false;
int rc;
ENTRY;
if (IS_ERR(parent))
RETURN(rc = PTR_ERR(parent));
- if (dt_try_as_dir(env, parent))
+ if (!dt_try_as_dir(env, parent))
GOTO(out, rc = -ENOTDIR);
handle = dt_trans_create(env, mdd->mdd_bottom);
if (rc != 0)
GOTO(stop, rc);
+ dt_write_lock(env, child, MOR_TGT_CHILD);
+ locked = true;
rc = dt_delete(env, parent, (struct dt_key *)lfsck_namespace_name,
handle, BYPASS_CAPA);
if (rc != 0)
rc = dt_destroy(env, child, handle);
+
+ GOTO(stop, rc);
+
+stop:
+ if (locked)
+ dt_write_unlock(env, child);
+
if (rc == 0) {
lu_object_put(env, &child->do_lu);
com->lc_obj = NULL;
}
- GOTO(stop, rc);
-
-stop:
dt_trans_stop(env, mdd->mdd_bottom, handle);
out:
return rc;
}
+static int mdd_lfsck_namespace_delete(const struct lu_env *env,
+ struct lfsck_component *com,
+ const struct lu_fid *fid)
+{
+ struct mdd_device *mdd = mdd_lfsck2mdd(com->lc_lfsck);
+ struct lu_fid *key = &mdd_env_info(env)->mti_fid;
+ struct thandle *handle;
+ struct dt_object *obj = com->lc_obj;
+ int rc;
+ ENTRY;
+
+ handle = dt_trans_create(env, mdd->mdd_bottom);
+ if (IS_ERR(handle))
+ RETURN(PTR_ERR(handle));
+
+ rc = dt_declare_delete(env, obj, (const struct dt_key *)fid, handle);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ fid_cpu_to_be(key, fid);
+ rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
+ BYPASS_CAPA);
+
+ GOTO(out, rc);
+
+out:
+ dt_trans_stop(env, mdd->mdd_bottom, handle);
+ return rc;
+}
+
static int mdd_lfsck_namespace_update(const struct lu_env *env,
struct lfsck_component *com,
const struct lu_fid *fid,
return rc;
}
+/**
+ * \retval +ve repaired
+ * \retval 0 no need to repair
+ * \retval -ve error cases
+ */
+static int mdd_lfsck_namespace_double_scan_one(const struct lu_env *env,
+ struct lfsck_component *com,
+ struct mdd_object *child,
+ __u8 flags)
+{
+ struct mdd_thread_info *info = mdd_env_info(env);
+ struct lu_attr *la = &info->mti_la;
+ struct lu_name *cname = &info->mti_name;
+ struct lu_fid *pfid = &info->mti_fid;
+ struct lu_fid *cfid = &info->mti_fid2;
+ struct md_lfsck *lfsck = com->lc_lfsck;
+ struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
+ struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
+ struct lfsck_namespace *ns =
+ (struct lfsck_namespace *)com->lc_file_ram;
+ struct mdd_link_data ldata = { 0 };
+ struct thandle *handle = NULL;
+ bool locked = false;
+ bool update = false;
+ int count;
+ int rc;
+ ENTRY;
+
+ if (com->lc_journal) {
+
+again:
+ LASSERT(!locked);
+
+ com->lc_journal = 1;
+ handle = mdd_trans_create(env, mdd);
+ if (IS_ERR(handle))
+ RETURN(rc = PTR_ERR(handle));
+
+ rc = mdd_declare_links_add(env, child, handle);;
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ rc = mdd_trans_start(env, mdd, handle);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ mdd_write_lock(env, child, MOR_TGT_CHILD);
+ locked = true;
+ }
+
+ if (unlikely(mdd_is_dead_obj(child)))
+ GOTO(stop, rc = 0);
+
+ rc = mdd_links_read(env, child, &ldata);
+ if (rc != 0) {
+ if ((bk->lb_param & LPF_DRYRUN) &&
+ (rc == -EINVAL || rc == -ENODATA))
+ rc = 1;
+
+ GOTO(stop, rc);
+ }
+
+ rc = mdd_la_get(env, child, la, BYPASS_CAPA);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ ldata.ml_lee = (struct link_ea_entry *)(ldata.ml_leh + 1);
+ count = ldata.ml_leh->leh_reccount;
+ while (count-- > 0) {
+ struct mdd_object *parent = NULL;
+ struct dt_object *dir;
+
+ mdd_lee_unpack(ldata.ml_lee, &ldata.ml_reclen, cname, pfid);
+ if (!fid_is_sane(pfid))
+ goto shrink;
+
+ parent = mdd_object_find(env, mdd, pfid);
+ if (parent == NULL)
+ goto shrink;
+ else if (IS_ERR(parent))
+ GOTO(stop, rc = PTR_ERR(parent));
+
+ if (!mdd_object_exists(parent))
+ goto shrink;
+
+ /* XXX: need more processing for remote object in the future. */
+ if (mdd_object_remote(parent)) {
+ mdd_object_put(env, parent);
+ ldata.ml_lee = (struct link_ea_entry *)
+ ((char *)ldata.ml_lee + ldata.ml_reclen);
+ continue;
+ }
+
+ dir = mdd_object_child(parent);
+ if (unlikely(!dt_try_as_dir(env, dir)))
+ goto shrink;
+
+ /* To guarantee the 'name' is terminated with '0'. */
+ memcpy(info->mti_key, cname->ln_name, cname->ln_namelen);
+ info->mti_key[cname->ln_namelen] = 0;
+ cname->ln_name = info->mti_key;
+ rc = dt_lookup(env, dir, (struct dt_rec *)cfid,
+ (const struct dt_key *)cname->ln_name,
+ BYPASS_CAPA);
+ if (rc != 0 && rc != -ENOENT) {
+ mdd_object_put(env, parent);
+ GOTO(stop, rc);
+ }
+
+ if (rc == 0) {
+ if (lu_fid_eq(cfid, mdo2fid(child))) {
+ mdd_object_put(env, parent);
+ ldata.ml_lee = (struct link_ea_entry *)
+ ((char *)ldata.ml_lee + ldata.ml_reclen);
+ continue;
+ }
+
+ goto shrink;
+ }
+
+ if (ldata.ml_leh->leh_reccount > la->la_nlink)
+ goto shrink;
+
+ /* XXX: For the case of there is linkea entry, but without name
+ * entry pointing to the object, and the object link count
+ * isn't less than the count of name entries, then add the
+ * name entry back to namespace.
+ *
+ * It is out of LFSCK 1.5 scope, will implement it in the
+ * future. Keep the linkEA entry. */
+ mdd_object_put(env, parent);
+ ldata.ml_lee = (struct link_ea_entry *)
+ ((char *)ldata.ml_lee + ldata.ml_reclen);
+ continue;
+
+shrink:
+ if (parent != NULL)
+ mdd_object_put(env, parent);
+ if (bk->lb_param & LPF_DRYRUN)
+ RETURN(1);
+
+ CDEBUG(D_LFSCK, "Remove linkEA: "DFID"[%.*s], "DFID"\n",
+ PFID(mdo2fid(child)), cname->ln_namelen, cname->ln_name,
+ PFID(pfid));
+ mdd_links_del_buf(env, &ldata, cname);
+ update = true;
+ }
+
+ if (update) {
+ if (!com->lc_journal) {
+ com->lc_journal = 1;
+ goto again;
+ }
+
+ rc = mdd_links_write(env, child, &ldata, handle);
+ }
+
+ GOTO(stop, rc);
+
+stop:
+ if (locked)
+ mdd_write_unlock(env, child);
+
+ if (handle != NULL)
+ mdd_trans_stop(env, mdd, rc, handle);
+
+ if (rc == 0 && update) {
+ ns->ln_objs_nlink_repaired++;
+ rc = 1;
+ }
+ return rc;
+}
+
/* namespace APIs */
static int mdd_lfsck_namespace_reset(const struct lu_env *env,
return ret;
}
-/* XXX: to be implemented in other patch. */
static int mdd_lfsck_namespace_double_scan(const struct lu_env *env,
struct lfsck_component *com)
{
struct md_lfsck *lfsck = com->lc_lfsck;
+ struct ptlrpc_thread *thread = &lfsck->ml_thread;
+ struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
struct lfsck_namespace *ns =
(struct lfsck_namespace *)com->lc_file_ram;
+ struct dt_object *obj = com->lc_obj;
+ const struct dt_it_ops *iops = &obj->do_index_ops->dio_it;
+ struct mdd_object *target;
+ struct dt_it *di;
+ struct dt_key *key;
+ struct lu_fid fid;
int rc;
+ __u8 flags;
+ ENTRY;
+
+ lfsck->ml_new_scanned = 0;
+ lfsck->ml_time_last_checkpoint = cfs_time_current();
+ lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
+ cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
+
+ di = iops->init(env, obj, 0, BYPASS_CAPA);
+ if (IS_ERR(di))
+ RETURN(PTR_ERR(di));
+
+ fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2);
+ rc = iops->get(env, di, (const struct dt_key *)&fid);
+ if (rc < 0)
+ GOTO(fini, rc);
+
+ /* Skip the start one, which either has been processed or non-exist. */
+ rc = iops->next(env, di);
+ if (rc != 0)
+ GOTO(put, rc);
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_DOUBLESCAN))
+ GOTO(put, rc = 0);
+
+ do {
+ if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) &&
+ cfs_fail_val > 0) {
+ struct l_wait_info lwi;
+
+ lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
+ NULL, NULL);
+ l_wait_event(thread->t_ctl_waitq,
+ !thread_is_running(thread),
+ &lwi);
+ }
+
+ key = iops->key(env, di);
+ fid_be_to_cpu(&fid, (const struct lu_fid *)key);
+ target = mdd_object_find(env, mdd, &fid);
+ down_write(&com->lc_sem);
+ if (target == NULL) {
+ rc = 0;
+ goto checkpoint;
+ } else if (IS_ERR(target)) {
+ rc = PTR_ERR(target);
+ goto checkpoint;
+ }
+
+ /* XXX: need more processing for remote object in the future. */
+ if (!mdd_object_exists(target) || mdd_object_remote(target))
+ goto obj_put;
+
+ rc = iops->rec(env, di, (struct dt_rec *)&flags, 0);
+ if (rc == 0)
+ rc = mdd_lfsck_namespace_double_scan_one(env, com,
+ target, flags);
+
+obj_put:
+ mdd_object_put(env, target);
+
+checkpoint:
+ lfsck->ml_new_scanned++;
+ com->lc_new_checked++;
+ ns->ln_fid_latest_scanned_phase2 = fid;
+ if (rc > 0)
+ ns->ln_objs_repaired_phase2++;
+ else if (rc < 0)
+ ns->ln_objs_failed_phase2++;
+ up_write(&com->lc_sem);
+
+ if ((rc == 0) || ((rc > 0) && !(bk->lb_param & LPF_DRYRUN))) {
+ mdd_lfsck_namespace_delete(env, com, &fid);
+ } else if (rc < 0) {
+ flags |= LLF_REPAIR_FAILED;
+ mdd_lfsck_namespace_update(env, com, &fid, flags, true);
+ }
+
+ if (rc < 0 && bk->lb_param & LPF_FAILOUT)
+ GOTO(put, rc);
+
+ if (likely(cfs_time_beforeq(cfs_time_current(),
+ lfsck->ml_time_next_checkpoint)) ||
+ com->lc_new_checked == 0)
+ goto speed;
+
+ down_write(&com->lc_sem);
+ ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
+ HALF_SEC - lfsck->ml_time_last_checkpoint);
+ ns->ln_time_last_checkpoint = cfs_time_current_sec();
+ ns->ln_objs_checked_phase2 += com->lc_new_checked;
+ com->lc_new_checked = 0;
+ rc = mdd_lfsck_namespace_store(env, com, false);
+ up_write(&com->lc_sem);
+ if (rc != 0)
+ GOTO(put, rc);
+ lfsck->ml_time_last_checkpoint = cfs_time_current();
+ lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
+ cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
+
+speed:
+ mdd_lfsck_control_speed(lfsck);
+ if (unlikely(!thread_is_running(thread)))
+ GOTO(put, rc = 0);
+
+ rc = iops->next(env, di);
+ } while (rc == 0);
+
+ GOTO(put, rc);
+
+put:
+ iops->put(env, di);
+
+fini:
+ iops->fini(env, di);
down_write(&com->lc_sem);
+ ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
+ HALF_SEC - lfsck->ml_time_last_checkpoint);
ns->ln_time_last_checkpoint = cfs_time_current_sec();
+ ns->ln_objs_checked_phase2 += com->lc_new_checked;
com->lc_new_checked = 0;
- com->lc_journal = 0;
- ns->ln_status = LS_COMPLETED;
- if (!(bk->lb_param & LPF_DRYRUN))
- ns->ln_flags &=
- ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE);
- ns->ln_time_last_complete = ns->ln_time_last_checkpoint;
- ns->ln_success_count++;
+ if (rc > 0) {
+ com->lc_journal = 0;
+ ns->ln_status = LS_COMPLETED;
+ if (!(bk->lb_param & LPF_DRYRUN))
+ ns->ln_flags &=
+ ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE);
+ ns->ln_time_last_complete = ns->ln_time_last_checkpoint;
+ ns->ln_success_count++;
+ } else if (rc == 0) {
+ if (lfsck->ml_paused)
+ ns->ln_status = LS_PAUSED;
+ else
+ ns->ln_status = LS_STOPPED;
+ } else {
+ ns->ln_status = LS_FAILED;
+ }
- spin_lock(&lfsck->ml_lock);
- cfs_list_del_init(&com->lc_link);
- cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
- spin_unlock(&lfsck->ml_lock);
+ if (ns->ln_status != LS_PAUSED) {
+ spin_lock(&lfsck->ml_lock);
+ cfs_list_del_init(&com->lc_link);
+ cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
+ spin_unlock(&lfsck->ml_lock);
+ }
rc = mdd_lfsck_namespace_store(env, com, false);
&lwi);
}
+ if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
+ RETURN(0);
+
lfsck->ml_new_scanned++;
rc = iops->rec(env, di, (struct dt_rec *)fid, 0);
if (rc != 0) {
if (lfsck->ml_paused && cfs_list_empty(&lfsck->ml_list_scan))
oit_iops->put(&env, oit_di);
- rc = mdd_lfsck_post(&env, lfsck, rc);
+ if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
+ rc = mdd_lfsck_post(&env, lfsck, rc);
if (lfsck->ml_di_dir != NULL)
mdd_lfsck_close_dir(&env, lfsck);
RETURN(-ENOTSUPP);
/* start == NULL means auto trigger paused LFSCK. */
- if (start == NULL && cfs_list_empty(&lfsck->ml_list_scan))
+ if ((start == NULL) &&
+ (cfs_list_empty(&lfsck->ml_list_scan) ||
+ OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)))
RETURN(0);
mutex_lock(&lfsck->ml_mutex);
}
run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
+test_2a() {
+ lfsck_prep 1 1
+ echo "start $SINGLEMDS"
+ start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+ error "(1) Fail to start MDS!"
+
+ mount_client $MOUNT || error "(2) Fail to start client!"
+
+ #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
+ touch $DIR/$tdir/dummy
+
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+ umount_client $MOUNT
+ $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
+
+ sleep 3
+ local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "completed" ] ||
+ error "(4) Expect 'completed', but got '$STATUS'"
+
+ local repaired=$($SHOW_NAMESPACE |
+ awk '/^updated_phase1/ { print $2 }')
+ [ $repaired -eq 1 ] ||
+ error "(5) Fail to repair crashed linkEA: $repaired"
+
+ mount_client $MOUNT || error "(6) Fail to start client!"
+
+ stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
+ error "(7) Fail to stat $DIR/$tdir/dummy"
+
+ local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
+ local dummyname=$($LFS fid2path $DIR $dummyfid)
+ [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
+ error "(8) Fail to repair linkEA: $dummyfid $dummyname"
+}
+run_test 2a "LFSCK can find out and repair crashed linkEA entry"
+
+test_2b()
+{
+ lfsck_prep 1 1
+ echo "start $SINGLEMDS"
+ start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+ error "(1) Fail to start MDS!"
+
+ mount_client $MOUNT || error "(2) Fail to start client!"
+
+ #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
+ touch $DIR/$tdir/dummy
+
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+ umount_client $MOUNT
+ $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
+
+ sleep 3
+ local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "completed" ] ||
+ error "(4) Expect 'completed', but got '$STATUS'"
+
+ local repaired=$($SHOW_NAMESPACE |
+ awk '/^updated_phase2/ { print $2 }')
+ [ $repaired -eq 1 ] ||
+ error "(5) Fail to repair crashed linkEA: $repaired"
+
+ mount_client $MOUNT || error "(6) Fail to start client!"
+
+ stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
+ error "(7) Fail to stat $DIR/$tdir/dummy"
+
+ local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
+ local dummyname=$($LFS fid2path $DIR $dummyfid)
+ [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
+ error "(8) Fail to repair linkEA: $dummyfid $dummyname"
+}
+run_test 2b "LFSCK can find out and remove invalid linkEA entry"
+
test_4()
{
lfsck_prep 3 3
}
run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
+test_7b()
+{
+ lfsck_prep 2 2
+ echo "start $SINGLEMDS"
+ start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+ error "(1) Fail to start MDS!"
+
+ mount_client $MOUNT || error "(2) Fail to start client!"
+
+ #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
+ for ((i=0; i<10; i++)); do
+ touch $DIR/$tdir/dummy${i}
+ done
+
+ #define OBD_FAIL_LFSCK_DELAY3 0x1602
+ do_facet $SINGLEMDS $LCTL set_param fail_val=1
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602
+ $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
+
+ sleep 3
+ local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "scanning-phase2" ] ||
+ error "(4) Expect 'scanning-phase2', but got '$STATUS'"
+
+ echo "stop $SINGLEMDS"
+ stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
+
+ echo "start $SINGLEMDS"
+ start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+ error "(6) Fail to start MDS!"
+
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "scanning-phase2" ] ||
+ error "(7) Expect 'scanning-phase2', but got '$STATUS'"
+
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+ do_facet $SINGLEMDS $LCTL set_param fail_val=0
+ sleep 3
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "completed" ] ||
+ error "(8) Expect 'completed', but got '$STATUS'"
+}
+run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
+
+test_8()
+{
+ lfsck_prep 20 20
+ echo "start $SINGLEMDS"
+ start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+ error "(1) Fail to start MDS!"
+
+ local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "init" ] ||
+ error "(2) Expect 'init', but got '$STATUS'"
+
+ mount_client $MOUNT || error "(3) Fail to start client!"
+
+ #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
+ mkdir $DIR/$tdir/crashed
+
+ #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
+ for ((i=0; i<5; i++)); do
+ touch $DIR/$tdir/dummy${i}
+ done
+
+ #define OBD_FAIL_LFSCK_DELAY2 0x1601
+ do_facet $SINGLEMDS $LCTL set_param fail_val=2
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
+ $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
+
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "scanning-phase1" ] ||
+ error "(5) Expect 'scanning-phase1', but got '$STATUS'"
+
+ $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
+
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "stopped" ] ||
+ error "(7) Expect 'stopped', but got '$STATUS'"
+
+ $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
+
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "scanning-phase1" ] ||
+ error "(9) Expect 'scanning-phase1', but got '$STATUS'"
+
+ #define OBD_FAIL_LFSCK_FATAL2 0x1609
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
+ sleep 3
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "failed" ] ||
+ error "(10) Expect 'failed', but got '$STATUS'"
+
+ #define OBD_FAIL_LFSCK_DELAY1 0x1600
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
+ $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
+
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "scanning-phase1" ] ||
+ error "(12) Expect 'scanning-phase1', but got '$STATUS'"
+
+ #define OBD_FAIL_LFSCK_CRASH 0x160a
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
+ sleep 5
+
+ echo "stop $SINGLEMDS"
+ stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
+
+ #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
+
+ echo "start $SINGLEMDS"
+ start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+ error "(14) Fail to start MDS!"
+
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "crashed" ] ||
+ error "(15) Expect 'crashed', but got '$STATUS'"
+
+ #define OBD_FAIL_LFSCK_DELAY2 0x1601
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
+ $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
+
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "scanning-phase1" ] ||
+ error "(17) Expect 'scanning-phase1', but got '$STATUS'"
+
+ echo "stop $SINGLEMDS"
+ stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
+
+ #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
+
+ echo "start $SINGLEMDS"
+ start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+ error "(19) Fail to start MDS!"
+
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "paused" ] ||
+ error "(20) Expect 'paused', but got '$STATUS'"
+
+ #define OBD_FAIL_LFSCK_DELAY3 0x1602
+ do_facet $SINGLEMDS $LCTL set_param fail_val=2
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602
+
+ $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
+ sleep 2
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "scanning-phase2" ] ||
+ error "(22) Expect 'scanning-phase2', but got '$STATUS'"
+
+ local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
+ [ "$FLAGS" == "scanned-once,inconsistent" ] ||
+ error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
+
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+ do_facet $SINGLEMDS $LCTL set_param fail_val=0
+ sleep 2
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "completed" ] ||
+ error "(24) Expect 'completed', but got '$STATUS'"
+
+ FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
+ [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
+
+}
+run_test 8 "LFSCK state machine"
+
test_9a() {
if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
skip "Testing on UP system, the speed may be inaccurate."
}
run_test 9a "LFSCK speed control (1)"
+test_9b() {
+ if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
+ skip "Testing on UP system, the speed may be inaccurate."
+ return 0
+ fi
+
+ lfsck_prep 0 0
+ echo "start $SINGLEMDS"
+ start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+ error "(1) Fail to start MDS!"
+
+ mount_client $MOUNT || error "(2) Fail to start client!"
+
+ echo "Another preparing... 50 * 50 files (with error) will be created."
+ #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
+ for ((i=0; i<50; i++)); do
+ mkdir -p $DIR/$tdir/d${i}
+ touch $DIR/$tdir/f${i}
+ for ((j=0; j<50; j++)); do
+ touch $DIR/$tdir/d${i}/f${j}
+ done
+ done
+
+ local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "init" ] ||
+ error "(3) Expect 'init', but got '$STATUS'"
+
+ #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
+ $START_NAMESPACE || error "(4) Fail to start LFSCK!"
+
+ sleep 10
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "stopped" ] ||
+ error "(5) Expect 'stopped', but got '$STATUS'"
+
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+ $START_NAMESPACE -s 50 || error "(6) Fail to start LFSCK!"
+
+ sleep 10
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "scanning-phase2" ] ||
+ error "(7) Expect 'scanning-phase2', but got '$STATUS'"
+
+ local SPEED=$($SHOW_NAMESPACE |
+ awk '/^average_speed_phase2/ { print $2 }')
+ # (50 * (10 + 1)) / 10 = 55
+ [ $SPEED -lt 60 ] ||
+ error "(8) Unexpected speed $SPEED, should not more than 60"
+
+ # adjust speed limit
+ do_facet $SINGLEMDS \
+ $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 150
+ sleep 10
+
+ SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
+ # (50 * (10 - 1) + 150 * (10 - 1)) / 20 = 90
+ [ $SPEED -lt 85 ] &&
+ error "(9) Unexpected speed $SPEED, should not less than 85"
+
+ # (50 * (10 + 1) + 150 * (10 + 1)) / 20 = 110
+ [ $SPEED -lt 115 ] ||
+ error "(10) Unexpected speed $SPEED, should not more than 115"
+
+ do_facet $SINGLEMDS \
+ $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
+ sleep 5
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "completed" ] ||
+ error "(11) Expect 'completed', but got '$STATUS'"
+}
+run_test 9b "LFSCK speed control (2)"
+
+test_10()
+{
+ lfsck_prep 1 1
+ echo "start $SINGLEMDS"
+ start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+ error "(1) Fail to start MDS!"
+
+ mount_client $MOUNT || error "(2) Fail to start client!"
+
+ #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
+ for ((i=0; i<1000; i=$((i+2)))); do
+ mkdir -p $DIR/$tdir/d${i}
+ touch $DIR/$tdir/f${i}
+ for ((j=0; j<5; j++)); do
+ touch $DIR/$tdir/d${i}/f${j}
+ done
+ done
+
+ #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
+ for ((i=1; i<1000; i=$((i+2)))); do
+ mkdir -p $DIR/$tdir/d${i}
+ touch $DIR/$tdir/f${i}
+ for ((j=0; j<5; j++)); do
+ touch $DIR/$tdir/d${i}/f${j}
+ done
+ done
+
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+ ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
+
+ umount_client $MOUNT
+ mount_client $MOUNT || error "(3) Fail to start client!"
+
+ local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "init" ] ||
+ error "(4) Expect 'init', but got '$STATUS'"
+
+ $START_NAMESPACE -s 100 || error "(5) Fail to start LFSCK!"
+
+ sleep 10
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "scanning-phase1" ] ||
+ error "(6) Expect 'scanning-phase1', but got '$STATUS'"
+
+ ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
+
+ touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
+
+ mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
+
+ unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
+
+ rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
+
+ mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
+
+ ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
+
+ ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
+ error "(14) Fail to softlink!"
+
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "scanning-phase1" ] ||
+ error "(15) Expect 'scanning-phase1', but got '$STATUS'"
+
+ do_facet $SINGLEMDS \
+ $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
+ umount_client $MOUNT
+ sleep 10
+ STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "completed" ] ||
+ error "(16) Expect 'completed', but got '$STATUS'"
+}
+run_test 10 "System is available during LFSCK scanning"
+
$LCTL set_param debug=-lfsck > /dev/null || true
# restore MDS/OST size