From: Sebastien Buisson Date: Tue, 28 Jan 2020 12:00:49 +0000 (+0900) Subject: LU-13116 mgc: do not lose sptlrpc config lock X-Git-Tag: 2.13.53~77 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=26e8f1137b82b26dca0219f58543ea6edd9a30ae LU-13116 mgc: do not lose sptlrpc config lock Multiple targets on the same node can make use of the same lock on sptlrpc config log. So if a target is being stopped, sptlrpc config log should not be directly marked as stopping. Fixes: 0ad54d5977 ("LU-11185 mgc: config lock leak") Signed-off-by: Sebastien Buisson Change-Id: I09c2b0c276daadb66721df88c33f734ebba86114 Reviewed-on: https://review.whamcloud.com/37344 Reviewed-by: Andreas Dilger Tested-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Alexander Boyko Reviewed-by: Oleg Drokin --- diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index b042c7f..f827fba 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -156,8 +156,10 @@ static void config_log_put(struct config_llog_data *cld) config_log_put(cld->cld_params); config_log_put(cld->cld_nodemap); config_log_put(cld->cld_sptlrpc); - if (cld_is_sptlrpc(cld)) + if (cld_is_sptlrpc(cld)) { + cld->cld_stopping = 1; sptlrpc_conf_log_stop(cld->cld_logname); + } class_export_put(cld->cld_mgcexp); OBD_FREE(cld, sizeof(*cld) + strlen(cld->cld_logname) + 1); @@ -519,16 +521,17 @@ static int config_log_end(char *logname, struct config_llog_instance *cfg) mutex_unlock(&cld->cld_lock); config_mark_cld_stop(cld_recover); + config_log_put(cld_recover); config_mark_cld_stop(cld_params); - config_mark_cld_stop(cld_barrier); - config_mark_cld_stop(cld_sptlrpc); - config_log_put(cld_params); - config_log_put(cld_recover); - /* don't set cld_stopping on nm lock as other targets may be active */ - config_log_put(cld_nodemap); + config_mark_cld_stop(cld_barrier); config_log_put(cld_barrier); + /* don't explicitly set cld_stopping on sptlrpc lock here, as other + * targets may be active, it will be done in config_log_put if necessary + */ config_log_put(cld_sptlrpc); + /* don't set cld_stopping on nm lock as other targets may be active */ + config_log_put(cld_nodemap); /* drop the ref from the find */ config_log_put(cld); diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 0b9285f..972441c 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2978,6 +2978,29 @@ test_140b() { } run_test 140b "local mount is excluded from recovery" +test_141() { + local oldc + local newc + + [ $PARALLEL == "yes" ] && skip "skip parallel run" + combined_mgs_mds || skip "needs combined MGS/MDT" + ( local_mode || from_build_tree ) && + skip "cannot run in local mode or from build tree" + + # some get_param have a bug to handle dot in param name + do_rpc_nodes $(facet_active_host $SINGLEMDS) cancel_lru_locks MGC + oldc=$(do_facet $SINGLEMDS $LCTL get_param -n \ + 'ldlm.namespaces.MGC*.lock_count') + fail $SINGLEMDS + do_rpc_nodes $(facet_active_host $SINGLEMDS) cancel_lru_locks MGC + newc=$(do_facet $SINGLEMDS $LCTL get_param -n \ + 'ldlm.namespaces.MGC*.lock_count') + + [ $oldc -eq $newc ] || error "mgc lost locks ($oldc != $newc)" + return 0 +} +run_test 141 "do not lose locks on MGS restart" + complete $SECONDS check_and_cleanup_lustre exit_status diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 0ff2a81..ba23673 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -972,6 +972,22 @@ add_sk_mntflag() { echo -n $mt_opts } +from_build_tree() { + local from_tree + + case $LUSTRE in + /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ + /usr/lib64/lustre ) + from_tree=false + ;; + *) + from_tree=true + ;; + esac + + [ $from_tree = true ] +} + init_gss() { if $SHARED_KEY; then GSS=true @@ -982,16 +998,6 @@ init_gss() { return fi - case $LUSTRE in - /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ - /usr/lib64/lustre ) - from_build_tree=false - ;; - *) - from_build_tree=true - ;; - esac - if ! module_loaded ptlrpc_gss; then load_module ptlrpc/gss/ptlrpc_gss module_loaded ptlrpc_gss || @@ -1010,7 +1016,7 @@ init_gss() { SK_NO_KEY=false local lgssc_conf_file="/etc/request-key.d/lgssc.conf" - if $from_build_tree; then + if from_build_tree; then mkdir -p $SK_OM_PATH if grep -q request-key /proc/mounts > /dev/null; then echo "SSK: Request key already mounted." @@ -1029,7 +1035,7 @@ init_gss() { cat $lgssc_conf_file if ! local_mode; then - if $from_build_tree; then + if from_build_tree; then do_nodes $(comma_list $(all_nodes)) "mkdir -p \ $SK_OM_PATH" do_nodes $(comma_list $(all_nodes)) "mount \ @@ -1115,7 +1121,7 @@ init_gss() { fi if [ -n "$LGSS_KEYRING_DEBUG" ] && \ - ( local_mode || $from_build_tree ); then + ( local_mode || from_build_tree ); then lctl set_param -n \ sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG elif [ -n "$LGSS_KEYRING_DEBUG" ]; then @@ -1134,16 +1140,6 @@ cleanup_gss() { cleanup_sk() { if $GSS_SK; then - case $LUSTRE in - /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ - /usr/lib64/lustre ) - from_build_tree=false - ;; - *) - from_build_tree=true - ;; - esac - if $SK_S2S; then do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNM" do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNMCLI" @@ -1156,7 +1152,7 @@ cleanup_sk() { $SK_PATH/$FSNAME*.key $SK_PATH/nodemap/$FSNAME*.key" do_nodes $(comma_list $(all_nodes)) "keyctl show | \ awk '/lustre/ { print \\\$1 }' | xargs -IX keyctl unlink X" - if $from_build_tree; then + if from_build_tree; then # Remove the mount and clean up the files we added to # SK_PATH do_nodes $(comma_list $(all_nodes)) "while grep -q \