le64_to_cpu(src->ln_objs_repaired_phase2);
dst->ln_objs_failed_phase2 = le64_to_cpu(src->ln_objs_failed_phase2);
dst->ln_objs_nlink_repaired = le64_to_cpu(src->ln_objs_nlink_repaired);
- dst->ln_objs_lost_found = le64_to_cpu(src->ln_objs_lost_found);
fid_le_to_cpu(&dst->ln_fid_latest_scanned_phase2,
&src->ln_fid_latest_scanned_phase2);
dst->ln_dirent_repaired = le64_to_cpu(src->ln_dirent_repaired);
dst->ln_dangling_repaired = le64_to_cpu(src->ln_dangling_repaired);
dst->ln_mul_ref_repaired = le64_to_cpu(src->ln_mul_ref_repaired);
dst->ln_bad_type_repaired = le64_to_cpu(src->ln_bad_type_repaired);
+ dst->ln_lost_dirent_repaired =
+ le64_to_cpu(src->ln_lost_dirent_repaired);
}
static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst,
cpu_to_le64(src->ln_objs_repaired_phase2);
dst->ln_objs_failed_phase2 = cpu_to_le64(src->ln_objs_failed_phase2);
dst->ln_objs_nlink_repaired = cpu_to_le64(src->ln_objs_nlink_repaired);
- dst->ln_objs_lost_found = cpu_to_le64(src->ln_objs_lost_found);
fid_cpu_to_le(&dst->ln_fid_latest_scanned_phase2,
&src->ln_fid_latest_scanned_phase2);
dst->ln_dirent_repaired = cpu_to_le64(src->ln_dirent_repaired);
dst->ln_dangling_repaired = cpu_to_le64(src->ln_dangling_repaired);
dst->ln_mul_ref_repaired = cpu_to_le64(src->ln_mul_ref_repaired);
dst->ln_bad_type_repaired = cpu_to_le64(src->ln_bad_type_repaired);
+ dst->ln_lost_dirent_repaired =
+ cpu_to_le64(src->ln_lost_dirent_repaired);
}
static void lfsck_namespace_record_failure(const struct lu_env *env,
return rc;
}
+/**
+ * Add the specified name entry back to namespace.
+ *
+ * If there is a linkEA entry that back references a name entry under
+ * some parent directory, but such parent directory does not have the
+ * claimed name entry. On the other hand, the linkEA entries count is
+ * not larger than the MDT-object's hard link count. Under such case,
+ * it is quite possible that the name entry is lost. Then the LFSCK
+ * should add the name entry back to the namespace.
+ *
+ * \param[in] env pointer to the thread context
+ * \param[in] com pointer to the lfsck component
+ * \param[in] parent pointer to the directory under which the name entry
+ * will be inserted into
+ * \param[in] child pointer to the object referenced by the name entry
+ * that to be inserted into the parent
+ * \param[in] name the name for the child in the parent directory
+ *
+ * \retval positive number for repaired cases
+ * \retval 0 if nothing to be repaired
+ * \retval negative error number on failure
+ */
static int lfsck_namespace_insert_normal(const struct lu_env *env,
struct lfsck_component *com,
struct dt_object *parent,
struct dt_object *child,
const char *name)
{
- /* XXX: TBD */
- return 0;
+ struct lfsck_thread_info *info = lfsck_env_info(env);
+ struct lu_attr *la = &info->lti_la;
+ struct dt_insert_rec *rec = &info->lti_dt_rec;
+ struct lfsck_instance *lfsck = com->lc_lfsck;
+ struct dt_device *dev = lfsck->li_next;
+ struct thandle *th = NULL;
+ struct lustre_handle lh = { 0 };
+ int rc = 0;
+ ENTRY;
+
+ if (unlikely(!dt_try_as_dir(env, parent)))
+ GOTO(log, rc = -ENOTDIR);
+
+ if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
+ GOTO(log, rc = 1);
+
+ /* Hold update lock on the parent to prevent others to access. */
+ rc = lfsck_ibits_lock(env, lfsck, parent, &lh,
+ MDS_INODELOCK_UPDATE, LCK_EX);
+ if (rc != 0)
+ GOTO(log, rc);
+
+ th = dt_trans_create(env, dev);
+ if (IS_ERR(th))
+ GOTO(unlock, rc = PTR_ERR(th));
+
+ rec->rec_type = lfsck_object_type(child) & S_IFMT;
+ rec->rec_fid = lfsck_dto2fid(child);
+ rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
+ (const struct dt_key *)name, th);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ if (S_ISDIR(rec->rec_type)) {
+ rc = dt_declare_ref_add(env, parent, th);
+ if (rc != 0)
+ GOTO(stop, rc);
+ }
+
+ memset(la, 0, sizeof(*la));
+ la->la_ctime = cfs_time_current_sec();
+ la->la_valid = LA_CTIME;
+ rc = dt_declare_attr_set(env, parent, la, th);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ rc = dt_trans_start_local(env, dev, th);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ rc = dt_insert(env, parent, (const struct dt_rec *)rec,
+ (const struct dt_key *)name, th, BYPASS_CAPA, 1);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ if (S_ISDIR(rec->rec_type)) {
+ dt_write_lock(env, parent, 0);
+ rc = dt_ref_add(env, parent, th);
+ dt_write_unlock(env, parent);
+ if (rc != 0)
+ GOTO(stop, rc);
+ }
+
+ la->la_ctime = cfs_time_current_sec();
+ rc = dt_attr_set(env, parent, la, th, BYPASS_CAPA);
+
+ GOTO(stop, rc = (rc == 0 ? 1 : rc));
+
+stop:
+ dt_trans_stop(env, dev, th);
+
+unlock:
+ lfsck_ibits_unlock(&lh, LCK_EX);
+
+log:
+ CDEBUG(D_LFSCK, "%s: namespace LFSCK insert object "DFID" with "
+ "the name %s and type %o to the parent "DFID": rc = %d\n",
+ lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(child)), name,
+ lfsck_object_type(child) & S_IFMT,
+ PFID(lfsck_dto2fid(parent)), rc);
+
+ if (rc != 0) {
+ struct lfsck_namespace *ns = com->lc_file_ram;
+
+ ns->ln_flags |= LF_INCONSISTENT;
+ if (rc > 0)
+ ns->ln_lost_dirent_repaired++;
+ }
+
+ return rc;
}
static int lfsck_namespace_create_orphan(const struct lu_env *env,
if (rc != 0)
GOTO(unlock2, rc);
- /* Someone has removed the bad name entry and reuse it for other
+ /* Someone has removed the bad name entry and reused it for other
* object by race. */
if (!lu_fid_eq(tfid, cfid))
GOTO(unlock2, rc = 0);
lfsck_ibits_unlock(lh, LCK_EX);
/* Create the lost parent as an orphan. */
rc = lfsck_namespace_create_orphan(env, com, parent);
- if (rc >= 0)
+ if (rc >= 0) {
/* Add the missing name entry to the parent. */
rc = lfsck_namespace_insert_normal(env, com, parent,
child, cname->ln_name);
+ if (unlikely(rc == -EEXIST)) {
+ /* Unfortunately, someone reused the name
+ * under the parent by race. So we have
+ * to remove the linkEA entry from
+ * current child object. It means that the
+ * LFSCK cannot recover the system
+ * totally back to its original status,
+ * but it is necessary to make the
+ * current system to be consistent. */
+ rc = lfsck_namespace_shrink_linkea(env,
+ com, child, ldata,
+ cname, tfid, true);
+ if (rc >= 0) {
+ snprintf(info->lti_tmpbuf,
+ sizeof(info->lti_tmpbuf),
+ "-"DFID, PFID(pfid));
+ rc = lfsck_namespace_insert_orphan(env,
+ com, child, info->lti_tmpbuf,
+ "D", NULL);
+ }
+ }
+ }
GOTO(out, rc);
}
/* Add the missing name entry back to the namespace. */
rc = lfsck_namespace_insert_normal(env, com, parent, child,
cname->ln_name);
+ if (unlikely(rc == -EEXIST)) {
+ /* Unfortunately, someone reused the name under the
+ * parent by race. So we have to remove the linkEA
+ * entry from current child object. It means that the
+ * LFSCK cannot recover the system totally back to
+ * its original status, but it is necessary to make
+ * the current system to be consistent.
+ *
+ * It also may be because of the LFSCK found some
+ * internal status of create operation. Under such
+ * case, nothing to be done. */
+ rc = lfsck_namespace_shrink_linkea_cond(env, com,
+ parent, child, ldata, cname, tfid);
+ if (rc >= 0) {
+ snprintf(info->lti_tmpbuf,
+ sizeof(info->lti_tmpbuf),
+ "-"DFID, PFID(pfid));
+ rc = lfsck_namespace_insert_orphan(env, com,
+ child, info->lti_tmpbuf, "D", NULL);
+ }
+ }
GOTO(out, rc);
}
/* Add the missing name entry to the parent. */
rc = lfsck_namespace_insert_normal(env, com,
parent, child, cname->ln_name);
- linkea_next_entry(&ldata);
+ if (unlikely(rc == -EEXIST))
+ /* Unfortunately, someone reused the
+ * name under the parent by race. So we
+ * have to remove the linkEA entry from
+ * current child object. It means that
+ * the LFSCK cannot recover the system
+ * totally back to its original status,
+ * but it is necessary to make the
+ * current system to be consistent. */
+ rc = lfsck_namespace_shrink_linkea(env,
+ com, child, &ldata,
+ cname, pfid, true);
+ else
+ linkea_next_entry(&ldata);
}
lfsck_object_put(env, parent);
/* Add the missing name entry back to the namespace. */
rc = lfsck_namespace_insert_normal(env, com, parent, child,
cname->ln_name);
+ if (unlikely(rc == -EEXIST))
+ /* Unfortunately, someone reused the name under the
+ * parent by race. So we have to remove the linkEA
+ * entry from current child object. It means that the
+ * LFSCK cannot recover the system totally back to
+ * its original status, but it is necessary to make
+ * the current system to be consistent.
+ *
+ * It also may be because of the LFSCK found some
+ * internal status of create operation. Under such
+ * case, nothing to be done. */
+ rc = lfsck_namespace_shrink_linkea_cond(env, com,
+ parent, child, &ldata, cname, pfid);
+ else
+ linkea_next_entry(&ldata);
+
lfsck_object_put(env, parent);
if (rc < 0)
GOTO(out, rc);
if (rc > 0)
repaired = true;
-
- linkea_next_entry(&ldata);
}
GOTO(out, rc = 0);
"dirent_repaired: "LPU64"\n"
"linkea_repaired: "LPU64"\n"
"nlinks_repaired: "LPU64"\n"
- "lost_found: "LPU64"\n"
"multiple_linked_checked: "LPU64"\n"
"multiple_linked_repaired: "LPU64"\n"
"unknown_inconsistency: "LPU64"\n"
"dangling_repaired: "LPU64"\n"
"multiple_referenced_repaired: "LPU64"\n"
"bad_file_type_repaired: "LPU64"\n"
+ "lost_dirent_repaired: "LPU64"\n"
"success_count: %u\n"
"run_time_phase1: %u seconds\n"
"run_time_phase2: %u seconds\n",
ns->ln_dirent_repaired,
ns->ln_linkea_repaired,
ns->ln_objs_nlink_repaired,
- ns->ln_objs_lost_found,
ns->ln_mul_linked_checked,
ns->ln_mul_linked_repaired,
ns->ln_unknown_inconsistency,
ns->ln_dangling_repaired,
ns->ln_mul_ref_repaired,
ns->ln_bad_type_repaired,
+ ns->ln_lost_dirent_repaired,
ns->ln_success_count,
time_phase1,
time_phase2);
ns->ln_objs_repaired_phase2 = 0;
ns->ln_objs_failed_phase2 = 0;
ns->ln_objs_nlink_repaired = 0;
- ns->ln_objs_lost_found = 0;
ns->ln_dirent_repaired = 0;
ns->ln_linkea_repaired = 0;
ns->ln_mul_linked_checked = 0;
ns->ln_dangling_repaired = 0;
ns->ln_mul_ref_repaired = 0;
ns->ln_bad_type_repaired = 0;
+ ns->ln_lost_dirent_repaired = 0;
fid_zero(&ns->ln_fid_latest_scanned_phase2);
if (list_empty(&com->lc_link_dir))
list_add_tail(&com->lc_link_dir,
ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21"
[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.50) ]] &&
- ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25"
+ ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25 26"
build_test_filter
awk '/^updated_phase1/ { print $2 }')
[ $repaired -eq 1 ] ||
- error "(5) Fail to repair missed FID-in-LMA: $repaired"
+ error "(5) Fail to repair the missing FID-in-LMA: $repaired"
do_facet $SINGLEMDS $LCTL set_param fail_loc=0
mount_client $MOUNT || error "(6) Fail to start client!"
do_facet $SINGLEMDS $LCTL set_param fail_loc=0
}
-run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
+run_test 1b "LFSCK can find out and repair the missing FID-in-LMA"
test_2a() {
lfsck_prep 1 1
[ "$dummyname" == "$DIR/$tdir/dummy" ] ||
error "(8) Fail to repair linkEA: $dummyfid $dummyname"
}
-run_test 2d "LFSCK can recover the missed linkEA entry"
+run_test 2d "LFSCK can recover the missing linkEA entry"
test_2e()
{
test_14() {
echo "#####"
echo "The OST-object referenced by the MDT-object should be there;"
- echo "otherwise, the LFSCK should re-create the missed OST-object."
+ echo "otherwise, the LFSCK should re-create the missing OST-object."
echo "#####"
check_mount_and_prep
test_18a() {
echo "#####"
echo "The target MDT-object is there, but related stripe information"
- echo "is lost or partly lost. The LFSCK should regenerate the missed"
+ echo "is lost or partly lost. The LFSCK should regenerate the missing"
echo "layout EA entries."
echo "#####"
}
run_test 25 "LFSCK can repair bad file type in the name entry"
+test_26a() {
+ echo "#####"
+ echo "The local name entry back referenced by the MDT-object is lost."
+ echo "The namespace LFSCK will add the missing local name entry back"
+ echo "to the normal namespace."
+ echo "#####"
+
+ check_mount_and_prep
+
+ $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
+ touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
+ local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
+
+ ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
+ error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
+
+ echo "Inject failure stub on MDT0 to simulate the case that"
+ echo "foo's name entry will be removed, but the foo's object"
+ echo "and its linkEA are kept in the system."
+
+ #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
+ rm -f $DIR/$tdir/d0/foo || error "(4) Fail to unlink $DIR/$tdir/d0/foo"
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+ ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(5) 'ls' should fail"
+
+ echo "Trigger namespace LFSCK to repair the missing remote name entry"
+ $START_NAMESPACE -r -A ||
+ error "(6) Fail to start LFSCK for namespace"
+
+ wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+ mdd.${MDT_DEV}.lfsck_namespace |
+ awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+ $SHOW_NAMESPACE
+ error "(7) unexpected status"
+ }
+
+ local repaired=$($SHOW_NAMESPACE |
+ awk '/^lost_dirent_repaired/ { print $2 }')
+ [ $repaired -eq 1 ] ||
+ error "(8) Fail to repair lost dirent: $repaired"
+
+ ls -ail $DIR/$tdir/d0/foo ||
+ error "(9) Fail to 'ls' $DIR/$tdir/d0/foo"
+
+ local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
+ [ "$foofid" == "$foofid2" ] ||
+ error "(10) foo's FID changed: $foofid, $foofid2"
+}
+run_test 26a "LFSCK can add the missing local name entry back to the namespace"
+
+test_26b() {
+ [ $MDSCOUNT -lt 2 ] &&
+ skip "We need at least 2 MDSes for this test" && return
+
+ echo "#####"
+ echo "The remote name entry back referenced by the MDT-object is lost."
+ echo "The namespace LFSCK will add the missing remote name entry back"
+ echo "to the normal namespace."
+ echo "#####"
+
+ check_mount_and_prep
+
+ $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
+ $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
+ local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
+
+ echo "Inject failure stub on MDT0 to simulate the case that"
+ echo "foo's name entry will be removed, but the foo's object"
+ echo "and its linkEA are kept in the system."
+
+ #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
+ rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+ ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(4) 'ls' should fail"
+
+ echo "Trigger namespace LFSCK to repair the missing remote name entry"
+ $START_NAMESPACE -r -A ||
+ error "(5) Fail to start LFSCK for namespace"
+
+ wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+ mdd.${MDT_DEV}.lfsck_namespace |
+ awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+ $SHOW_NAMESPACE
+ error "(6) unexpected status"
+ }
+
+ local repaired=$($SHOW_NAMESPACE |
+ awk '/^lost_dirent_repaired/ { print $2 }')
+ [ $repaired -eq 1 ] ||
+ error "(7) Fail to repair lost dirent: $repaired"
+
+ ls -ail $DIR/$tdir/d0/foo ||
+ error "(8) Fail to 'ls' $DIR/$tdir/d0/foo"
+
+ local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
+ [ "$foofid" == "$foofid2" ] ||
+ error "(9) foo's FID changed: $foofid, $foofid2"
+}
+run_test 26b "LFSCK can add the missing remote name entry back to the namespace"
+
$LCTL set_param debug=-lfsck > /dev/null || true
# restore MDS/OST size