From: wang di Date: Tue, 9 Jun 2015 18:49:45 +0000 (-0700) Subject: LU-6705 lod: re-order lodsub recovery cleanup X-Git-Tag: 2.7.57~68 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=8299bdd484ad44d3ed25dcc68e8440242c155c80;p=fs%2Flustre-release.git LU-6705 lod: re-order lodsub recovery cleanup Before MDT stop recovery thread, it should disconnect the OSP and stop the sub_recovery thread, otherwise the recovery thread will be blocked by sub_recovery thread. The the right steps to stop the MDT recovery thread is (see mdt_device_fini()) 1. Do stack pre_cleanup (by LCFG_PRE_CLEANUP), to disconnect OSP and stop sub_recovery thread (used for collecting update records). 2. Stop the recovery thread. 3. Cleanup the update llog ctxt in mdt_stack_fini() (by LCFG_CLEANUP). Signed-off-by: wang di Change-Id: I4e938cb87ab02d388bce16bd15d181fd7fcc783b Reviewed-on: http://review.whamcloud.com/15210 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Fan Yong Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin --- diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index 63dfca9..ff75841 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -631,6 +631,50 @@ free_lrd: } /** + * Stop sub recovery thread + * + * Stop sub recovery thread on all subs. + * + * \param[in] env execution environment + * \param[in] lod lod device to do update recovery + */ +static void lod_sub_stop_recovery_threads(const struct lu_env *env, + struct lod_device *lod) +{ + struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; + struct ptlrpc_thread *thread; + unsigned int i; + + /* Stop the update log commit cancel threads and finish master + * llog ctxt */ + thread = &lod->lod_child_recovery_thread; + /* Stop recovery thread first */ + if (thread != NULL && thread->t_flags & SVC_RUNNING) { + thread->t_flags = SVC_STOPPING; + wake_up(&thread->t_ctl_waitq); + wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED); + } + + lod_getref(ltd); + cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) { + struct lod_tgt_desc *tgt; + + tgt = LTD_TGT(ltd, i); + thread = tgt->ltd_recovery_thread; + if (thread != NULL && thread->t_flags & SVC_RUNNING) { + thread->t_flags = SVC_STOPPING; + wake_up(&thread->t_ctl_waitq); + wait_event(thread->t_ctl_waitq, + thread->t_flags & SVC_STOPPED); + OBD_FREE_PTR(tgt->ltd_recovery_thread); + tgt->ltd_recovery_thread = NULL; + } + } + + lod_putref(lod, ltd); +} + +/** * finish all sub llog * * cleanup all of sub llog ctxt on the LOD. @@ -653,12 +697,8 @@ static void lod_sub_fini_all_llogs(const struct lu_env *env, struct lod_tgt_desc *tgt; tgt = LTD_TGT(ltd, i); - if (tgt->ltd_recovery_thread != NULL) { - lod_sub_fini_llog(env, tgt->ltd_tgt, - tgt->ltd_recovery_thread); - OBD_FREE_PTR(tgt->ltd_recovery_thread); - tgt->ltd_recovery_thread = NULL; - } + lod_sub_fini_llog(env, tgt->ltd_tgt, + tgt->ltd_recovery_thread); } lod_putref(lod, ltd); @@ -843,6 +883,7 @@ static int lod_process_config(const struct lu_env *env, CDEBUG(D_HA, "%s: can't process %u: %d\n", lod2obd(lod)->obd_name, lcfg->lcfg_command, rc); + lod_sub_stop_recovery_threads(env, lod); lod_fini_distribute_txn(env, lod); lod_sub_fini_all_llogs(env, lod); break; diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index a8b9c48..48d916b 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -4232,9 +4232,9 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m) stop.ls_flags = 0; next->md_ops->mdo_iocontrol(env, next, OBD_IOC_STOP_LFSCK, 0, &stop); + mdt_stack_pre_fini(env, m, md2lu_dev(m->mdt_child)); target_recovery_fini(obd); ping_evictor_stop(); - mdt_stack_pre_fini(env, m, md2lu_dev(m->mdt_child)); if (m->mdt_opts.mo_coordinator) mdt_hsm_cdt_stop(m);