Whamcloud - gitweb
LU-6705 lod: re-order lodsub recovery cleanup 10/15210/3
authorwang di <di.wang@intel.com>
Tue, 9 Jun 2015 18:49:45 +0000 (11:49 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 8 Jul 2015 17:30:39 +0000 (17:30 +0000)
Before MDT stop recovery thread, it should disconnect
the OSP and stop the sub_recovery thread, otherwise
the recovery thread will be blocked by sub_recovery
thread.

The the right steps to stop the MDT recovery thread is
(see mdt_device_fini())

1. Do stack pre_cleanup (by LCFG_PRE_CLEANUP), to disconnect
OSP and stop sub_recovery thread (used for collecting update
records).
2. Stop the recovery thread.
3. Cleanup the update llog ctxt in mdt_stack_fini()
(by LCFG_CLEANUP).

Signed-off-by: wang di <di.wang@intel.com>
Change-Id: I4e938cb87ab02d388bce16bd15d181fd7fcc783b
Reviewed-on: http://review.whamcloud.com/15210
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Fan Yong <fan.yong@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/lod/lod_dev.c
lustre/mdt/mdt_handler.c

index 63dfca9..ff75841 100644 (file)
@@ -631,6 +631,50 @@ free_lrd:
 }
 
 /**
+ * Stop sub recovery thread
+ *
+ * Stop sub recovery thread on all subs.
+ *
+ * \param[in] env      execution environment
+ * \param[in] lod      lod device to do update recovery
+ */
+static void lod_sub_stop_recovery_threads(const struct lu_env *env,
+                                         struct lod_device *lod)
+{
+       struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
+       struct ptlrpc_thread    *thread;
+       unsigned int i;
+
+       /* Stop the update log commit cancel threads and finish master
+        * llog ctxt */
+       thread = &lod->lod_child_recovery_thread;
+       /* Stop recovery thread first */
+       if (thread != NULL && thread->t_flags & SVC_RUNNING) {
+               thread->t_flags = SVC_STOPPING;
+               wake_up(&thread->t_ctl_waitq);
+               wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
+       }
+
+       lod_getref(ltd);
+       cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) {
+               struct lod_tgt_desc     *tgt;
+
+               tgt = LTD_TGT(ltd, i);
+               thread = tgt->ltd_recovery_thread;
+               if (thread != NULL && thread->t_flags & SVC_RUNNING) {
+                       thread->t_flags = SVC_STOPPING;
+                       wake_up(&thread->t_ctl_waitq);
+                       wait_event(thread->t_ctl_waitq,
+                                  thread->t_flags & SVC_STOPPED);
+                       OBD_FREE_PTR(tgt->ltd_recovery_thread);
+                       tgt->ltd_recovery_thread = NULL;
+               }
+       }
+
+       lod_putref(lod, ltd);
+}
+
+/**
  * finish all sub llog
  *
  * cleanup all of sub llog ctxt on the LOD.
@@ -653,12 +697,8 @@ static void lod_sub_fini_all_llogs(const struct lu_env *env,
                struct lod_tgt_desc     *tgt;
 
                tgt = LTD_TGT(ltd, i);
-               if (tgt->ltd_recovery_thread != NULL) {
-                       lod_sub_fini_llog(env, tgt->ltd_tgt,
-                                         tgt->ltd_recovery_thread);
-                       OBD_FREE_PTR(tgt->ltd_recovery_thread);
-                       tgt->ltd_recovery_thread = NULL;
-               }
+               lod_sub_fini_llog(env, tgt->ltd_tgt,
+                                 tgt->ltd_recovery_thread);
        }
 
        lod_putref(lod, ltd);
@@ -843,6 +883,7 @@ static int lod_process_config(const struct lu_env *env,
                        CDEBUG(D_HA, "%s: can't process %u: %d\n",
                               lod2obd(lod)->obd_name, lcfg->lcfg_command, rc);
 
+               lod_sub_stop_recovery_threads(env, lod);
                lod_fini_distribute_txn(env, lod);
                lod_sub_fini_all_llogs(env, lod);
                break;
index a8b9c48..48d916b 100644 (file)
@@ -4232,9 +4232,9 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m)
        stop.ls_flags = 0;
        next->md_ops->mdo_iocontrol(env, next, OBD_IOC_STOP_LFSCK, 0, &stop);
 
+       mdt_stack_pre_fini(env, m, md2lu_dev(m->mdt_child));
        target_recovery_fini(obd);
        ping_evictor_stop();
-       mdt_stack_pre_fini(env, m, md2lu_dev(m->mdt_child));
 
        if (m->mdt_opts.mo_coordinator)
                mdt_hsm_cdt_stop(m);