#define OBD_FAIL_MDS_XATTR_REP 0x161
#define OBD_FAIL_MDS_TRACK_OVERFLOW 0x162
#define OBD_FAIL_MDS_LOV_CREATE_RACE 0x163
+#define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164
/* layout lock */
#define OBD_FAIL_MDS_NO_LL_GETATTR 0x170
mdt_object_put(mti->mti_env, obj);
mutex_lock(&cdt->cdt_restore_lock);
+ if (unlikely((cdt->cdt_state == CDT_STOPPED) ||
+ (cdt->cdt_state == CDT_STOPPING))) {
+ mutex_unlock(&cdt->cdt_restore_lock);
+ mdt_object_unlock(mti, NULL, &crh->crh_lh, 1);
+ OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem);
+ GOTO(out, rc = -EAGAIN);
+ }
list_add_tail(&crh->crh_list, &cdt->cdt_restore_hdl);
mutex_unlock(&cdt->cdt_restore_lock);
}
record:
+ /*
+ * Wait here to catch the 2nd RESTORE request to the same FID.
+ * Normally layout lock protects against adding such request.
+ * But when cdt is stopping it cancel all locks via
+ * ldlm_resource_clean and protections may not work.
+ * See LU-9266 and sanity-hsm_407 for details.
+ */
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_HSM_CDT_DELAY, cfs_fail_val);
/* record request */
rc = mdt_agent_record_add(mti->mti_env, mdt, compound_id,
archive_id, flags, hai);
}
run_test 406 "attempting to migrate HSM archived files is safe"
+test_407() {
+ needclients 2 || return 0
+ # test needs a running copytool
+ copytool_setup
+
+ mkdir -p $DIR/$tdir
+
+ local f=$DIR/$tdir/$tfile
+ local f2=$DIR2/$tdir/$tfile
+ local fid
+ fid=$(make_custom_file_for_progress $f 39 1000000)
+ [ $? != 0 ] && skip "not enough free space" && return
+
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE SUCCEED
+ $LFS hsm_release $f
+
+#define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164
+ do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x164
+
+ md5sum $f &
+ # 1st request holds layout lock while appropriate
+ # RESTORE record is still not added to llog
+ md5sum $f2 &
+ sleep 2
+
+ # after umount hsm_actions->O/x/x log shouldn't have
+ # double RESTORE records like below
+ #[0x200000401:0x1:0x0]...0x58d03a0d/0x58d03a0c action=RESTORE...WAITING
+ #[0x200000401:0x1:0x0]...0x58d03a0c/0x58d03a0d action=RESTORE...WAITING
+ sleep 30 &&
+ do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"&
+ fail $SINGLEMDS
+
+ wait_request_state $fid RESTORE SUCCEED
+ copytool_cleanup
+}
+run_test 407 "Check for double RESTORE records in llog"
+
test_500()
{
[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.92) ] &&