instead do this in cleanup phase so that all OSPs have chance
to abort in-flight RPCs which can block MDT thread holding
LDLM locks.
Fixes:
226fd401f9 ("LU-7660 dne: support fs default stripe")
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: Ib3714b29c514a7fa938d47717dc36525654407d6
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49925
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
#define OBD_FAIL_OUT_OBJECT_MISS 0x1708
#define OBD_FAIL_OUT_EIO 0x1709
#define OBD_FAIL_BUT_UPDATE_NET_REP 0x170a
+#define OBD_FAIL_OUT_DROP_DESTROY 0x170b
/* MIGRATE */
#define OBD_FAIL_MIGRATE_ENTRIES 0x1801
case LCFG_PRE_CLEANUP:
rc = osp_disconnect(d);
osp_update_fini(env, d);
- if (obd->obd_namespace != NULL)
- ldlm_namespace_free_prior(obd->obd_namespace, NULL, 1);
break;
case LCFG_CLEANUP:
+ /*
+ * cleanup ldlm so that PRE_CLEANUP phase doesn't block
+ * awaiting for locks held by MDT threads awaiting for
+ * all OSPs to interrupt their in-flight RPCs
+ */
+ if (obd->obd_namespace != NULL)
+ ldlm_namespace_free_prior(obd->obd_namespace, NULL, 1);
lu_dev_del_linkage(dev->ld_site, dev);
rc = osp_shutdown(env, d);
break;
tti->tti_u.update.tti_update_reply,
tti->tti_u.update.tti_update_reply_index);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OUT_DROP_DESTROY))
+ tsi->tsi_pill->rc_req->rq_no_reply = 1;
+
RETURN(rc);
}
}
run_test 135 "Server failure in lock replay phase"
+test_136() {
+ (( $MDSCOUNT >= 3 )) || skip "needs > 2 MDTs"
+ (( MDS1_VERSION >= $(version_code 2.15.53) )) ||
+ skip "need MDS version >= 2.15.53 for LU-16536 fix"
+
+ $LFS mkdir -i0 -c3 $DIR/$tdir || error "can't mkdir"
+ $LFS getdirstripe $DIR/$tdir
+ sync;sync;sync
+
+#define OBD_FAIL_OUT_DROP_DESTROY 0x170b
+ local mdts=$(comma_list $(mdts_nodes))
+ do_nodes $mdts $LCTL set_param fail_loc=0x170b
+ rmdir $DIR/$tdir &
+ sleep 0.5
+ stop mds2
+ stop mds3
+ stop mds1
+ start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS || error "MDT1 start failed"
+ start mds2 $(mdsdevname 2) $MDS_MOUNT_OPTS || error "MDT2 start failed"
+ start mds3 $(mdsdevname 3) $MDS_MOUNT_OPTS || error "MDT3 star"
+}
+run_test 136 "MDS to disconnect all OSPs first, then cleanup ldlm"
+
complete $SECONDS
check_and_cleanup_lustre
exit_status