From 218cce8b70e562c8bcdd4833e10e9f0da594c4a6 Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Thu, 3 Apr 2025 12:53:33 +0700 Subject: [PATCH] LU-18506 ptlrpc: improve lu_env_add error handling Improve error handling for cases when lu_env_add() could fail. HPE-bug-id: LUS-12726 Fixes: d891becebb ("LU-17668 ptlrpc: create env in few more threads") Signed-off-by: Shaun Tancheff Change-Id: Ie6627438340196ef50355893c52eacf3322fb2ef Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57272 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Timothy Day Reviewed-by: Oleg Drokin --- lustre/include/obd_class.h | 11 +++++------ lustre/ldlm/ldlm_lockd.c | 26 +++++++++++++++++--------- lustre/mgs/mgs_handler.c | 4 +++- lustre/obdclass/genops.c | 36 ++++++++++++++++++++---------------- lustre/obdclass/llog.c | 5 +++-- lustre/obdecho/echo_client.c | 5 ++++- lustre/osp/osp_sync.c | 15 ++++++++++----- lustre/ptlrpc/pinger.c | 27 ++++++++++++++++++--------- lustre/target/tgt_mount.c | 4 +++- 9 files changed, 83 insertions(+), 50 deletions(-) diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index f04f318..fb156d3 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -614,9 +614,9 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) static inline int obd_precleanup(struct obd_device *obd) { - int rc; struct lu_device_type *ldt = obd->obd_type->typ_lu; struct lu_device *d = obd->obd_lu_dev; + int rc = -ENOMEM; ENTRY; @@ -624,15 +624,14 @@ static inline int obd_precleanup(struct obd_device *obd) struct lu_env *env = lu_env_find(); struct lu_env _env; - if (!env) { + if (!env && lu_env_init(&_env, ldt->ldt_ctx_tags) == 0) { env = &_env; - rc = lu_env_init(env, ldt->ldt_ctx_tags); - LASSERT(rc == 0); - lu_env_add(env); + rc = lu_env_add(env); } ldt->ldt_ops->ldto_device_fini(env, d); if (env == &_env) { - lu_env_remove(env); + if (rc == 0) + lu_env_remove(env); lu_env_fini(env); } } diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 3d3c1c3..697ad4f 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -160,6 +160,13 @@ static int expired_lock_main(void *arg) ENTRY; + rc = lu_env_init(&env, LCT_DT_THREAD | LCT_MD_THREAD); + if (rc) + RETURN(rc); + rc = lu_env_add(&env); + if (unlikely(rc)) + GOTO(out_fini, rc); + expired_lock_thread_state = ELT_READY; wake_up(&expired_lock_wait_queue); @@ -168,14 +175,12 @@ static int expired_lock_main(void *arg) have_expired_locks() || expired_lock_thread_state == ELT_TERMINATE); - rc = lu_env_init(&env, LCT_DT_THREAD | LCT_MD_THREAD); - if (rc) { - CERROR("can't init env: rc=%d\n", rc); + rc = lu_env_refill(&env); + if (unlikely(rc)) { + CERROR("can't refill env context: rc=%d\n", rc); schedule_timeout(HZ * 3); continue; } - rc = lu_env_add(&env); - LASSERT(rc == 0); spin_lock_bh(&waiting_locks_spinlock); @@ -266,9 +271,6 @@ static int expired_lock_main(void *arg) } spin_unlock_bh(&waiting_locks_spinlock); - lu_env_remove(&env); - lu_env_fini(&env); - if (do_dump) { CERROR("dump the log upon eviction\n"); libcfs_debug_dumplog(); @@ -280,7 +282,13 @@ static int expired_lock_main(void *arg) expired_lock_thread_state = ELT_STOPPED; wake_up(&expired_lock_wait_queue); - RETURN(0); + rc = 0; + + lu_env_remove(&env); +out_fini: + lu_env_fini(&env); + + RETURN(rc); } /** diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index d4b945c..5a1799f 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -877,7 +877,8 @@ static int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (rc) RETURN(rc); rc = lu_env_add(&env); - LASSERT(rc == 0); + if (unlikely(rc)) + GOTO(out_fini, rc); rc = -EINVAL; switch (cmd) { @@ -1018,6 +1019,7 @@ out_free: } out: lu_env_remove(&env); +out_fini: lu_env_fini(&env); RETURN(rc); } diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index b9f1557..2f751df 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1695,7 +1695,6 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) struct obd_export *doomed_exp; int exports_evicted = 0; struct lu_env *env = NULL, _env; - int rc; libcfs_strnid(&nid_key, nid); @@ -1705,19 +1704,21 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) */ if (obd->obd_stopping) { spin_unlock(&obd->obd_dev_lock); - return exports_evicted; + return 0; } spin_unlock(&obd->obd_dev_lock); /* can be called via procfs and from ptlrpc */ env = lu_env_find(); if (env == NULL) { - rc = lu_env_init(&_env, LCT_DT_THREAD | LCT_MD_THREAD); + int rc = lu_env_init(&_env, LCT_DT_THREAD | LCT_MD_THREAD); + if (rc) return rc; - rc = lu_env_add(&_env); - LASSERT(rc == 0); env = &_env; + rc = lu_env_add(env); + if (unlikely(rc)) + GOTO(out_fini, exports_evicted = rc); } doomed_exp = NULL; @@ -1738,15 +1739,17 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) doomed_exp = NULL; } + if (!exports_evicted) + CDEBUG(D_HA, + "%s: can't disconnect NID '%s': no exports found\n", + obd->obd_name, nid); + if (env == &_env) { lu_env_remove(&_env); +out_fini: lu_env_fini(&_env); } - if (!exports_evicted) - CDEBUG(D_HA, - "%s: can't disconnect NID '%s': no exports found\n", - obd->obd_name, nid); return exports_evicted; } EXPORT_SYMBOL(obd_export_evict_by_nid); @@ -1755,28 +1758,28 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) { struct obd_export *doomed_exp = NULL; struct obd_uuid doomed_uuid; - int exports_evicted = 0; struct lu_env env; - int rc; + int rc = 0; spin_lock(&obd->obd_dev_lock); if (obd->obd_stopping) { spin_unlock(&obd->obd_dev_lock); - return exports_evicted; + return 0; } spin_unlock(&obd->obd_dev_lock); obd_str2uuid(&doomed_uuid, uuid); if (obd_uuid_equals(&doomed_uuid, &obd->obd_uuid)) { CERROR("%s: can't evict myself\n", obd->obd_name); - return exports_evicted; + return 0; } rc = lu_env_init(&env, LCT_DT_THREAD | LCT_MD_THREAD); if (rc) return rc; rc = lu_env_add(&env); - LASSERT(rc == 0); + if (unlikely(rc)) + goto out_fini; doomed_exp = obd_uuid_lookup(obd, &doomed_uuid); @@ -1789,13 +1792,14 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) class_fail_export(doomed_exp); class_export_put(doomed_exp); obd_uuid_del(obd, doomed_exp); - exports_evicted++; + rc = 1; } lu_env_remove(&env); +out_fini: lu_env_fini(&env); - return exports_evicted; + return rc; } #endif /* HAVE_SERVER_SUPPORT */ diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index d6509e0..556b4c2 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -518,10 +518,10 @@ static int llog_process_thread(void *arg) rc = lu_env_init(&_env, LCT_DT_THREAD | LCT_MD_THREAD); if (rc) RETURN(rc); + env = &_env; rc = lu_env_add(&_env); if (unlikely(rc)) - RETURN(rc); - env = &_env; + GOTO(out_fini, rc); } lti = llog_info(env); @@ -833,6 +833,7 @@ out: out_env: if (env == &_env) { lu_env_remove(&_env); +out_fini: lu_env_fini(&_env); } diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index c0710e7..2589edc 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -2280,7 +2280,9 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (IS_ERR(env)) RETURN(PTR_ERR(env)); - lu_env_add(env); + rc = lu_env_add(env); + if (rc) + GOTO(out_put, rc); #ifdef HAVE_SERVER_SUPPORT if (cmd == OBD_IOC_ECHO_MD || cmd == OBD_IOC_ECHO_ALLOC_SEQ) @@ -2410,6 +2412,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, EXIT; out: lu_env_remove(env); +out_put: cl_env_put(env, &refcheck); return rc; diff --git a/lustre/osp/osp_sync.c b/lustre/osp/osp_sync.c index b65db08..bdf6e19 100644 --- a/lustre/osp/osp_sync.c +++ b/lustre/osp/osp_sync.c @@ -1349,7 +1349,8 @@ static int osp_sync_thread(void *_args) RETURN(rc); } rc = lu_env_add(&env); - LASSERT(rc == 0); + if (unlikely(rc)) + GOTO(out_fini, rc); again: ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); @@ -1466,16 +1467,20 @@ out: atomic_read(&d->opd_sync_rpcs_in_flight), list_empty(&d->opd_sync_committed_there) ? "" : "!"); - lu_env_remove(&env); - lu_env_fini(&env); - if (xchg(&d->opd_sync_task, NULL) == NULL) /* already being waited for */ wait_event_interruptible(d->opd_sync_waitq, kthread_should_stop()); + + /* caller will delete args when non-zero is returned here */ OBD_FREE_PTR(args); + rc = 0; - RETURN(0); + lu_env_remove(&env); +out_fini: + lu_env_fini(&env); + + RETURN(rc); } /** diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index c758d58..01eb712 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -450,6 +450,16 @@ static int ping_evictor_main(void *arg) int rc; ENTRY; + + rc = lu_env_init(&env, LCT_DT_THREAD | LCT_MD_THREAD); + if (rc) { + CERROR("can't init env: rc=%d\n", rc); + RETURN(rc); + } + rc = lu_env_add(&env); + if (unlikely(rc)) + GOTO(out_fini, rc); + unshare_fs_struct(); CDEBUG(D_HA, "Starting Ping Evictor\n"); pet_state = PET_READY; @@ -462,14 +472,12 @@ static int ping_evictor_main(void *arg) if ((pet_state == PET_TERMINATE) && list_empty(&pet_list)) break; - rc = lu_env_init(&env, LCT_DT_THREAD | LCT_MD_THREAD); - if (rc) { - CERROR("can't init env: rc=%d\n", rc); + rc = lu_env_refill(&env); + if (unlikely(rc)) { + CERROR("can't refill env context: rc=%d\n", rc); schedule_timeout(HZ * 3); continue; } - rc = lu_env_add(&env); - LASSERT(rc == 0); /* * we only get here if pet_exp != NULL, and the end of this @@ -524,9 +532,6 @@ static int ping_evictor_main(void *arg) } spin_unlock(&obd->obd_dev_lock); - lu_env_remove(&env); - lu_env_fini(&env); - spin_lock(&pet_lock); list_del_init(&obd->obd_evict_list); spin_unlock(&pet_lock); @@ -535,7 +540,11 @@ static int ping_evictor_main(void *arg) } CDEBUG(D_HA, "Exiting Ping Evictor\n"); - RETURN(0); + lu_env_remove(&env); +out_fini: + lu_env_fini(&env); + + RETURN(rc); } void ping_evictor_start(void) diff --git a/lustre/target/tgt_mount.c b/lustre/target/tgt_mount.c index 97757f7..8861b6b 100644 --- a/lustre/target/tgt_mount.c +++ b/lustre/target/tgt_mount.c @@ -1712,7 +1712,8 @@ static void server_put_super(struct super_block *sb) GOTO(out, rc); } rc = lu_env_add(&env); - LASSERT(rc == 0); + if (unlikely(rc)) + GOTO(out_fini, rc); /* Stop the target */ if (!test_bit(LMD_FLG_NOSVC, lsi->lsi_lmd->lmd_flags) && @@ -1803,6 +1804,7 @@ static void server_put_super(struct super_block *sb) } lu_env_remove(&env); +out_fini: lu_env_fini(&env); out: -- 1.8.3.1