* Generally, when the device is mounted, it will
* auto check whether the system is restored from
* file-level backup or not. We trust such detect
- * to distinguish the 1st case from the 2nd case. */
- if (rc == 0) {
- if (!IS_ERR(inode) && inode->i_generation != 0 &&
- inode->i_generation == id->oii_gen)
- /* "id->oii_gen != OSD_OII_NOGEN" is for
- * "@cached == false" case. */
- rc = -ENOENT;
- else
- rc = -EREMCHG;
- } else {
+ * to distinguish the 1st case from the 2nd case:
+ * if the OI files are consistent but may contain
+ * stale OI mappings because of case 2, if iget()
+ * returns -ENOENT or -ESTALE, then it should be
+ * the case 2. */
+ if (rc != 0)
/* If the OI mapping was in OI file before the
* osd_iget_check(), but now, it is disappear,
* then it must be removed by race. That is a
* normal race case. */
- }
+ GOTO(put, rc);
+
+ if ((!IS_ERR(inode) && inode->i_generation != 0 &&
+ inode->i_generation == id->oii_gen) ||
+ (IS_ERR(inode) && !(dev->od_scrub.os_file.sf_flags &
+ SF_INCONSISTENT)))
+ rc = -ENOENT;
+ else
+ rc = -EREMCHG;
} else {
if (id->oii_gen == OSD_OII_NOGEN)
osd_id_gen(id, inode->i_ino, inode->i_generation);
struct osd_device *dev;
struct osd_idmap_cache *oic;
struct osd_inode_id *id;
+ struct osd_inode_id *tid;
struct inode *inode = NULL;
struct osd_scrub *scrub;
struct scrub_file *sf;
__u32 flags = SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT |
SS_AUTO_FULL;
+ __u32 saved_ino;
+ __u32 saved_gen;
int result = 0;
int rc1 = 0;
bool cached = true;
if (IS_ERR(inode)) {
result = PTR_ERR(inode);
if (result == -ENOENT || result == -ESTALE)
- GOTO(out, result = -ENOENT);
+ GOTO(out, result = 0);
if (result == -EREMCHG) {
LASSERT(obj->oo_inode->i_sb == osd_sb(dev));
result = osd_check_lma(env, obj);
- if (result != 0) {
- if (result == -ENODATA) {
- if (cached) {
- result = osd_oi_lookup(info, dev, fid, id,
- OI_CHECK_FLD);
- if (result != 0) {
- /* result == -ENOENT means that the OI
- * mapping has been removed by race,
- * the target inode belongs to other
- * object.
- *
- * Others error also can be returned
- * directly. */
- iput(inode);
- obj->oo_inode = NULL;
- GOTO(out, result);
- } else {
- /* result == 0 means the cached OI
- * mapping is still in the OI file,
- * the target the inode is valid. */
- }
- } else {
- /* The current OI mapping is from the OI file,
- * since the inode has been found via
- * osd_iget_check(), no need recheck OI. */
- }
-
- goto found;
- }
-
- iput(inode);
- inode = NULL;
- obj->oo_inode = NULL;
- if (result != -EREMCHG)
- GOTO(out, result);
+ if (result == 0)
+ goto found;
- if (cached) {
- result = osd_oi_lookup(info, dev, fid, id,
- OI_CHECK_FLD);
- /* result == -ENOENT means the cached OI mapping
- * has been removed from the OI file by race,
- * above target inode belongs to other object.
- *
- * Others error also can be returned directly. */
- if (result != 0)
- GOTO(out, result);
+ tid = &info->oti_id3;
+ LASSERT(tid != id);
- /* result == 0, goto trigger */
- } else {
+ if (result == -ENODATA) {
+ if (!cached)
/* The current OI mapping is from the OI file,
* since the inode has been found via
* osd_iget_check(), no need recheck OI. */
+ goto found;
+
+ result = osd_oi_lookup(info, dev, fid, tid, OI_CHECK_FLD);
+ if (result == 0) {
+ LASSERTF(tid->oii_ino == id->oii_ino &&
+ tid->oii_gen == id->oii_gen,
+ "OI mapping changed(1): %u/%u => %u/%u",
+ tid->oii_ino, tid->oii_gen,
+ id->oii_ino, id->oii_gen);
+
+ LASSERTF(tid->oii_ino == inode->i_ino &&
+ tid->oii_gen == inode->i_generation,
+ "locate wrong inode(1): %u/%u => %ld/%u",
+ tid->oii_ino, tid->oii_gen,
+ inode->i_ino, inode->i_generation);
+
+ /* "result == 0" means the cached OI mapping is still in
+ * the OI file, so the target the inode is valid. */
+ goto found;
}
- goto trigger;
+ /* "result == -ENOENT" means that the OI mappinghas been removed
+ * by race, the target inode belongs to other object.
+ *
+ * Others error can be returned directly. */
+ if (result == -ENOENT)
+ result = 0;
}
+ saved_ino = inode->i_ino;
+ saved_gen = inode->i_generation;
+ iput(inode);
+ inode = NULL;
+ obj->oo_inode = NULL;
+
+ if (result != -EREMCHG)
+ GOTO(out, result);
+
+ if (!cached)
+ /* The current OI mapping is from the OI file,
+ * since the inode has been found via
+ * osd_iget_check(), no need recheck OI. */
+ goto trigger;
+
+ result = osd_oi_lookup(info, dev, fid, tid, OI_CHECK_FLD);
+ /* "result == -ENOENT" means the cached OI mapping has been removed from
+ * the OI file by race, above target inode belongs to other object.
+ *
+ * Others error can be returned directly. */
+ if (result != 0)
+ GOTO(out, result = (result == -ENOENT ? 0 : result));
+
+ LASSERTF(tid->oii_ino == id->oii_ino && tid->oii_gen == id->oii_gen,
+ "OI mapping changed(2): %u/%u => %u/%u",
+ tid->oii_ino, tid->oii_gen, id->oii_ino, id->oii_gen);
+
+ LASSERTF(tid->oii_ino == saved_ino && tid->oii_gen == saved_gen,
+ "locate wrong inode(2): %u/%u => %u/%u",
+ tid->oii_ino, tid->oii_gen, saved_ino, saved_gen);
+
+ goto trigger;
+
found:
obj->oo_compat_dot_created = 1;
obj->oo_compat_dotdot_created = 1;
osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
/* Recycle idle OI leaf may cause additional three OI blocks
* to be changed. */
- osd_trans_declare_op(env, oh, OSD_OT_DELETE,
+ if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ2))
+ osd_trans_declare_op(env, oh, OSD_OT_DELETE,
osd_dto_credits_noquota[DTO_INDEX_DELETE] + 3);
/* one less inode */
rc = osd_declare_inode_qid(env, i_uid_read(inode), i_gid_read(inode),
osd_trans_exec_op(env, th, OSD_OT_DESTROY);
ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_DESTROY);
- result = osd_oi_delete(osd_oti_get(env), osd, fid, oh->ot_handle,
- OI_CHECK_FLD);
+
+ if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ2))
+ result = osd_oi_delete(osd_oti_get(env), osd, fid,
+ oh->ot_handle, OI_CHECK_FLD);
osd_trans_exec_check(env, th, OSD_OT_DESTROY);
/* XXX: add to ext3 orphan list */
}
run_test 18f "Skip the failed OST(s) when handle orphan OST-objects"
+test_18g() {
+ echo "#####"
+ echo "The target MDT-object is lost, but related OI mapping is there"
+ echo "The LFSCK should recreate the lost MDT-object without affected"
+ echo "by the stale OI mapping."
+ echo "#####"
+
+ check_mount_and_prep
+ $LFS mkdir -i 0 $DIR/$tdir/a1
+ $LFS setstripe -c -1 -i 0 -S 1M $DIR/$tdir/a1
+ dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=$OSTCOUNT
+ local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
+ echo ${fid1}
+ $LFS getstripe $DIR/$tdir/a1/f1
+ cancel_lru_locks osc
+
+ echo "Inject failure to simulate lost MDT-object but keep OI mapping"
+ #define OBD_FAIL_LFSCK_LOST_MDTOBJ2 0x162e
+ do_facet mds1 $LCTL set_param fail_loc=0x162e
+ rm -f $DIR/$tdir/a1/f1
+
+ do_facet mds1 $LCTL set_param fail_loc=0
+ cancel_lru_locks mdc
+ cancel_lru_locks osc
+
+ echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
+ $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
+
+ for k in $(seq $MDSCOUNT); do
+ # The LFSCK status query internal is 30 seconds. For the case
+ # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
+ # time to guarantee the status sync up.
+ wait_update_facet mds${k} "$LCTL get_param -n \
+ mdd.$(facet_svc mds${k}).lfsck_layout |
+ awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
+ error "(2) MDS${k} is not the expected 'completed'"
+ done
+
+ for k in $(seq $OSTCOUNT); do
+ local cur_status=$(do_facet ost${k} $LCTL get_param -n \
+ obdfilter.$(facet_svc ost${k}).lfsck_layout |
+ awk '/^status/ { print $2 }')
+ [ "$cur_status" == "completed" ] ||
+ error "(3) OST${k} Expect 'completed', but got '$cur_status'"
+ done
+
+ local repaired=$(do_facet mds1 $LCTL get_param -n \
+ mdd.$(facet_svc mds1).lfsck_layout |
+ awk '/^repaired_orphan/ { print $2 }')
+ [ $repaired -eq $OSTCOUNT ] ||
+ error "(4) Expect $OSTCOUNT fixed, but got: $repaired"
+
+ echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
+ mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 ||
+ error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
+
+ $LFS path2fid $DIR/$tdir/a1/f1
+ $LFS getstripe $DIR/$tdir/a1/f1
+}
+run_test 18g "Find out orphan OST-object and repair it (7)"
+
$LCTL set_param debug=-cache > /dev/null
test_19a() {