From c8777fda0edbd1ae5e8f32a1c4413e333ad3b3fa Mon Sep 17 00:00:00 2001 From: Vladimir Saveliev Date: Tue, 27 Aug 2024 11:17:30 +0300 Subject: [PATCH] LU-17305 obdclass: do not link all exports to obd's timed list Osp device may get class_cleanup()-ed in course of evictor process where obd->u.cli.cl_import gets zeroed: ping_evictor_main class_fail_export obd_disconnect osp_obd_disconnect class_manual_cleanup class_process_config(LCFG_CLEANUP) class_cleanup obd_precleanup osp_device_fini client_obd_cleanup obd_cleanup_client_import obd->u.cli.cl_import = NULL; osp-pre threads are not stopped by the evictor, which leads to assertion on: osp_precreate_thread osp_statfs_update imp = d->opd_obd->u.cli.cl_import; LASSERT(imp); Test to illustrate the issue is added. Add to obd's timed list only evictable exports HPE-bug-id: LUS-11933 Signed-off-by: Vladimir Saveliev Change-Id: I1aab4a573fc4ef33d53d822bec1a28dd3bcece74 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53192 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Vitaly Fertman Reviewed-by: Oleg Drokin --- lustre/include/lustre_export.h | 3 ++- lustre/include/obd_support.h | 1 + lustre/ldlm/ldlm_lib.c | 26 +++++++++++++++++++++++--- lustre/mdt/mdt_handler.c | 18 ++++++++++++------ lustre/obdclass/genops.c | 4 +--- lustre/obdclass/obd_config.c | 4 +++- lustre/obdecho/echo_client.c | 6 +++--- lustre/ofd/ofd_obd.c | 6 +++--- lustre/ptlrpc/pinger.c | 4 ++++ lustre/target/tgt_lastrcvd.c | 6 ------ lustre/tests/recovery-small.sh | 18 ++++++++++++++++++ 11 files changed, 70 insertions(+), 26 deletions(-) diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 251408f..844fc4f 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -259,7 +259,8 @@ struct obd_export { * set as 0 (false) */ exp_old_falloc:1, - exp_hashed:1; + exp_hashed:1, + exp_not_timed:1; /* also protected by exp_lock */ enum lustre_sec_part exp_sp_peer; struct sptlrpc_flavor exp_flvr; /* current */ diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 894f6da..589f052 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -489,6 +489,7 @@ extern bool obd_enable_fname_encoding; #define OBD_FAIL_OBD_STOP_MDS_RACE 0x60c #define OBD_FAIL_OBD_SETUP 0x60d #define OBD_FAIL_OBD_CLEANUP 0x60e +#define OBD_FAIL_OBD_PAUSE_EVICTOR 0x60f #define OBD_FAIL_TGT_REPLY_NET 0x700 #define OBD_FAIL_TGT_CONN_RACE 0x701 diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 0cfe2c4..c24d892 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -850,8 +850,9 @@ static int target_handle_reconnect(struct lustre_handle *conn, int rc = 0; ENTRY; - hdl = &exp->exp_imp_reverse->imp_remote_handle; - if (!exp->exp_connection || !lustre_handle_is_used(hdl)) { + + if (!exp->exp_connection || + !lustre_handle_is_used(&exp->exp_imp_reverse->imp_remote_handle)) { conn->cookie = exp->exp_handle.h_cookie; CDEBUG(D_HA, "connect export for UUID '%s' at %p, cookie %#llx\n", @@ -860,6 +861,7 @@ static int target_handle_reconnect(struct lustre_handle *conn, } target = exp->exp_obd; + hdl = &exp->exp_imp_reverse->imp_remote_handle; /* Might be a re-connect after a partition. */ if (memcmp(&conn->cookie, &hdl->cookie, sizeof(conn->cookie))) { @@ -995,6 +997,12 @@ int rev_import_init(struct obd_export *export) spin_unlock(&export->exp_lock); class_import_put(revimp); + if (!export->exp_not_timed) { + spin_lock(&obd->obd_dev_lock); + list_add_tail(&export->exp_obd_chain_timed, + &obd->obd_exports_timed); + spin_unlock(&obd->obd_dev_lock); + } return 0; } EXPORT_SYMBOL(rev_import_init); @@ -1431,8 +1439,16 @@ dont_check_exports: rc = obd_reconnect(req->rq_svc_thread->t_env, export, target, &cluuid, data, &req->rq_peer.nid); - if (rc == 0) + if (rc == 0) { reconnected = true; + /* + * In case of recovery, + * tgt_clients_data_init() created the export, + * exp_imp_reverse is still needed. + */ + if (export->exp_imp_reverse == NULL) + rc = rev_import_init(export); + } } if (rc) GOTO(out, rc); @@ -1480,6 +1496,10 @@ dont_check_exports: * for each connect called disconnect * should be called to cleanup stuff */ + spin_lock(&target->obd_dev_lock); + list_del_init(&export->exp_obd_chain_timed); + spin_unlock(&target->obd_dev_lock); + class_export_get(export); obd_disconnect(export); } diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 5b91ff4..b205a45 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -7174,9 +7174,9 @@ static int mdt_connect_internal(const struct lu_env *env, if (OCD_HAS_FLAG(data, PINGLESS)) { if (ptlrpc_pinger_suppress_pings()) { - spin_lock(&exp->exp_obd->obd_dev_lock); - list_del_init(&exp->exp_obd_chain_timed); - spin_unlock(&exp->exp_obd->obd_dev_lock); + spin_lock(&exp->exp_lock); + exp->exp_not_timed = 1; + spin_unlock(&exp->exp_lock); } else { data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS; } @@ -7469,9 +7469,9 @@ out: * let's not add this export to the timed chain list. */ if (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) { - spin_lock(&lexp->exp_obd->obd_dev_lock); - list_del_init(&lexp->exp_obd_chain_timed); - spin_unlock(&lexp->exp_obd->obd_dev_lock); + spin_lock(&lexp->exp_lock); + lexp->exp_not_timed = 1; + spin_unlock(&lexp->exp_lock); } } @@ -7503,6 +7503,12 @@ static int mdt_obd_reconnect(const struct lu_env *env, else nodemap_del_member(exp); + if (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) { + spin_lock(&exp->exp_lock); + exp->exp_not_timed = 1; + spin_unlock(&exp->exp_lock); + } + RETURN(rc); } diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 93b1fe8..e65ef9f 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -982,6 +982,7 @@ static struct obd_export *__class_new_export(struct obd_device *obd, spin_lock_init(&export->exp_bl_list_lock); INIT_LIST_HEAD(&export->exp_bl_list); INIT_LIST_HEAD(&export->exp_stale_list); + INIT_LIST_HEAD(&export->exp_obd_chain_timed); INIT_WORK(&export->exp_zombie_work, obd_zombie_exp_cull); export->exp_sp_peer = LUSTRE_SP_ANY; @@ -1010,12 +1011,9 @@ static struct obd_export *__class_new_export(struct obd_device *obd, if (!is_self) { class_incref(obd, "export", export); - list_add_tail(&export->exp_obd_chain_timed, - &obd->obd_exports_timed); list_add(&export->exp_obd_chain, &obd->obd_exports); obd->obd_num_exports++; } else { - INIT_LIST_HEAD(&export->exp_obd_chain_timed); INIT_LIST_HEAD(&export->exp_obd_chain); } spin_unlock(&obd->obd_dev_lock); diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index e72b1a7..9ab855c 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -640,7 +640,9 @@ int class_attach(struct lustre_cfg *lcfg) } obd->obd_self_export = exp; - list_del_init(&exp->exp_obd_chain_timed); + spin_lock(&exp->exp_lock); + exp->exp_not_timed = 1; + spin_unlock(&exp->exp_lock); class_export_put(exp); rc = class_register_device(obd); diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index df4dfec..4760b58 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -2477,9 +2477,9 @@ static int echo_client_setup(const struct lu_env *env, rc = obd_connect(env, &ec->ec_exp, tgt, &echo_uuid, ocd, NULL); if (rc == 0) { /* Turn off pinger because it connects to tgt obd directly. */ - spin_lock(&tgt->obd_dev_lock); - list_del_init(&ec->ec_exp->exp_obd_chain_timed); - spin_unlock(&tgt->obd_dev_lock); + spin_lock(&ec->ec_exp->exp_lock); + ec->ec_exp->exp_not_timed = 1; + spin_unlock(&ec->ec_exp->exp_lock); } OBD_FREE(ocd, sizeof(*ocd)); diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index 5c8a7c1..3fe0b4f 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -237,9 +237,9 @@ static int ofd_parse_connect_data(const struct lu_env *env, if (OCD_HAS_FLAG(data, PINGLESS)) { if (ptlrpc_pinger_suppress_pings()) { - spin_lock(&exp->exp_obd->obd_dev_lock); - list_del_init(&exp->exp_obd_chain_timed); - spin_unlock(&exp->exp_obd->obd_dev_lock); + spin_lock(&exp->exp_lock); + exp->exp_not_timed = 1; + spin_unlock(&exp->exp_lock); } else { data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS; } diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 3d0f8ba..a04b889 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -481,6 +481,10 @@ static int ping_evictor_main(void *arg) obd_evict_list); spin_unlock(&pet_lock); + if (!strcmp(obd->obd_type->typ_name, LUSTRE_OSP_NAME)) + CFS_FAIL_TIMEOUT(OBD_FAIL_OBD_PAUSE_EVICTOR, + PING_INTERVAL + PING_EVICT_TIMEOUT); + expire_time = ktime_get_real_seconds() - PING_EVICT_TIMEOUT; CDEBUG(D_HA, "evicting all exports of obd %s older than %lld\n", diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index 5e3821b..2e5bc0b 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -1760,12 +1760,6 @@ static int tgt_clients_data_init(const struct lu_env *env, class_export_put(exp); - rc = rev_import_init(exp); - if (rc != 0) { - class_unlink_export(exp); - GOTO(err_out, rc); - } - /* Need to check last_rcvd even for duplicated exports. */ CDEBUG(D_OTHER, "client at idx %d has last_transno = %llu\n", cl_idx, last_transno); diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 7981998..4c8f61d 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -3774,6 +3774,24 @@ test_160() { } run_test 160 "MDT destroys are blocked by grouplocks" +test_161() { + [[ $MDSCOUNT -lt 2 ]] && skip_env "needs >= 2 MDTs" + local ping_interval=$($LCTL get_param -n ping_interval) + local evict_multiplier=$($LCTL get_param -n evict_multiplier) + local pause=$((ping_interval * (evict_multiplier + 2))) + + #define OBD_FAIL_OBD_PAUSE_EVICTOR 0x60f + do_facet mds1 $LCTL set_param fail_loc=0x8000060f + + $LFS mkdir -i 1 $DIR/$tdir || error "mkdir $tdir" + $LFS mkdir -i 0 $DIR/$tdir/remote || error "mkdir $tdir/remote" + + echo sleep $pause seconds + sleep $pause + rmdir $DIR/$tdir/remote || error "rmdir $tdir/remote" +} +run_test 161 "evict osp by ping evictor" + complete_test $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1