From: Mr NeilBrown Date: Wed, 23 Oct 2019 00:30:49 +0000 (+1100) Subject: LU-12780 lod: don't use ptlrpc_thread for recovery thread X-Git-Tag: 2.13.53~85 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=3b0094779b58c162e5dc553f6d7cdff1910d0c3b LU-12780 lod: don't use ptlrpc_thread for recovery thread rather than ptlrpc_thread, use native kthreads functionality. - This saves a memory allocation - and associated error handling and memory freeing - for a 'struct ptlrpc_thread' - startup-up code is moved out of the thread to before the thread is started. This make error handling clearer, though it does require putting a 'struct lu_env' in struct lod_recovery_data. - As freeing lod_recovery_data and the env is most easily done in the thread, add a completion to ensure the thread actually starts. Without this it is possible for a kthread_stop() to stop the thread before it calls the thread function at all. - As the thread exits independently but is also waited for, we need a handshake around kthread_stop(). Both thread_exit and code that stops the thread use xchg() to store a NULL in the task pointer ltd_recovery_task or lod_child_recovery_thread. If the thread reads out a non-NULL pointer, it can exit because nothing will wait for it. If it gets NULL, it must wait for kthread_should_stop(). The waiter will call kthread_shop() immediately after the xchg, so the wait will be short. We use wait_var_event() for this wait even though there is no wake_up_var(). wait_var_event() is just a convenient macro that handles thread state change for us - the wait queue is irrelevant. Signed-off-by: Mr NeilBrown Change-Id: I95403fc38bac99f21a4353ec7814816b813184d0 Reviewed-on: https://review.whamcloud.com/36261 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Shaun Tancheff Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index e617d03..88e25c5 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -1476,7 +1476,7 @@ struct lu_tgt_desc { __u32 ltd_index; __u32 ltd_gen; struct list_head ltd_kill; - struct ptlrpc_thread *ltd_recovery_thread; + struct task_struct *ltd_recovery_task; struct mutex ltd_fid_mutex; struct lu_tgt_qos ltd_qos; /* qos info per target */ struct obd_statfs ltd_statfs; diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index e36190f..170fff3 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -269,8 +269,10 @@ static int lod_sub_process_config(const struct lu_env *env, struct lod_recovery_data { struct lod_device *lrd_lod; struct lod_tgt_desc *lrd_ltd; - struct ptlrpc_thread *lrd_thread; + struct task_struct **lrd_task; u32 lrd_idx; + struct lu_env lrd_env; + struct completion *lrd_started; }; @@ -354,9 +356,8 @@ static int lod_sub_recovery_thread(void *arg) struct lod_recovery_data *lrd = arg; struct lod_device *lod = lrd->lrd_lod; struct dt_device *dt; - struct ptlrpc_thread *thread = lrd->lrd_thread; struct llog_ctxt *ctxt = NULL; - struct lu_env env; + struct lu_env *env = &lrd->lrd_env; struct lu_target *lut; struct lu_tgt_desc *mdt = NULL; time64_t start; @@ -365,17 +366,6 @@ static int lod_sub_recovery_thread(void *arg) ENTRY; - thread->t_flags = SVC_RUNNING; - wake_up(&thread->t_ctl_waitq); - - rc = lu_env_init(&env, LCT_LOCAL | LCT_MD_THREAD); - if (rc != 0) { - OBD_FREE_PTR(lrd); - CERROR("%s: can't initialize env: rc = %d\n", - lod2obd(lod)->obd_name, rc); - RETURN(rc); - } - lut = lod2lu_dev(lod)->ld_site->ls_tgt; atomic_inc(&lut->lut_tdtd->tdtd_recovery_threads_count); if (!lrd->lrd_ltd) @@ -384,6 +374,7 @@ static int lod_sub_recovery_thread(void *arg) dt = lrd->lrd_ltd->ltd_tgt; start = ktime_get_real_seconds(); + complete(lrd->lrd_started); again: @@ -392,7 +383,7 @@ again: OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_RECOVERY_CONNECT, cfs_fail_val); rc = -EIO; } else { - rc = lod_sub_prep_llog(&env, lod, dt, lrd->lrd_idx); + rc = lod_sub_prep_llog(env, lod, dt, lrd->lrd_idx); } if (!rc && !lod->lod_child->dd_rdonly) { /* Process the recovery record */ @@ -401,7 +392,7 @@ again: LASSERT(ctxt != NULL); LASSERT(ctxt->loc_handle != NULL); - rc = llog_cat_process(&env, ctxt->loc_handle, + rc = llog_cat_process(env, ctxt->loc_handle, lod_process_recovery_updates, lrd, 0, 0); } @@ -419,7 +410,7 @@ again: !top_device->ld_obd->obd_stopping) { if (ctxt) { if (ctxt->loc_handle) - llog_cat_close(&env, + llog_cat_close(env, ctxt->loc_handle); llog_ctxt_put(ctxt); } @@ -473,13 +464,16 @@ again: EXIT; out: - OBD_FREE_PTR(lrd); - thread->t_flags = SVC_STOPPED; atomic_dec(&lut->lut_tdtd->tdtd_recovery_threads_count); wake_up(&lut->lut_tdtd->tdtd_recovery_threads_waitq); - wake_up(&thread->t_ctl_waitq); - lu_env_fini(&env); - return rc; + if (xchg(lrd->lrd_task, NULL) == NULL) + /* Someone is waiting for us to finish, need + * to synchronize cleanly. + */ + wait_var_event(lrd, kthread_should_stop()); + lu_env_fini(env); + OBD_FREE_PTR(lrd); + return 0; } /** @@ -493,21 +487,21 @@ out: * \param[in] thread recovery thread on this sub device */ void lod_sub_fini_llog(const struct lu_env *env, - struct dt_device *dt, struct ptlrpc_thread *thread) + struct dt_device *dt, struct task_struct **thread) { struct obd_device *obd; struct llog_ctxt *ctxt; + struct task_struct *task = NULL; ENTRY; obd = dt->dd_lu_dev.ld_obd; CDEBUG(D_INFO, "%s: finish sub llog\n", obd->obd_name); - /* Stop recovery thread first */ - if (thread && thread->t_flags & SVC_RUNNING) { - thread->t_flags = SVC_STOPPING; - wake_up(&thread->t_ctl_waitq); - wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED); - } + /* Wait for recovery thread to complete */ + if (thread) + task = xchg(thread, NULL); + if (task) + kthread_stop(task); ctxt = llog_get_context(obd, LLOG_UPDATELOG_ORIG_CTXT); if (!ctxt) @@ -600,7 +594,8 @@ int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod, { struct obd_device *obd; struct lod_recovery_data *lrd = NULL; - struct ptlrpc_thread *thread; + DECLARE_COMPLETION_ONSTACK(started); + struct task_struct **taskp; struct task_struct *task; struct lod_tgt_desc *subtgt = NULL; u32 index; @@ -618,7 +613,7 @@ int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod, RETURN(-ENOMEM); if (lod->lod_child == dt) { - thread = &lod->lod_child_recovery_thread; + taskp = &lod->lod_child_recovery_task; index = master_index; } else { struct lu_tgt_desc *mdt; @@ -631,20 +626,16 @@ int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod, } } LASSERT(subtgt != NULL); - OBD_ALLOC_PTR(subtgt->ltd_recovery_thread); - if (!subtgt->ltd_recovery_thread) - GOTO(free_lrd, rc = -ENOMEM); - - thread = subtgt->ltd_recovery_thread; + taskp = &subtgt->ltd_recovery_task; } CDEBUG(D_INFO, "%s init sub log %s\n", lod2obd(lod)->obd_name, dt->dd_lu_dev.ld_obd->obd_name); lrd->lrd_lod = lod; lrd->lrd_ltd = subtgt; - lrd->lrd_thread = thread; + lrd->lrd_task = taskp; lrd->lrd_idx = index; - init_waitqueue_head(&thread->t_ctl_waitq); + lrd->lrd_started = &started; obd = dt->dd_lu_dev.ld_obd; obd->obd_lvfs_ctxt.dt = dt; @@ -653,30 +644,33 @@ int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod, if (rc < 0) { CERROR("%s: cannot setup updatelog llog: rc = %d\n", obd->obd_name, rc); - GOTO(free_thread, rc); + GOTO(free_lrd, rc); + } + + rc = lu_env_init(&lrd->lrd_env, LCT_LOCAL | LCT_MD_THREAD); + if (rc != 0) { + CERROR("%s: can't initialize env: rc = %d\n", + lod2obd(lod)->obd_name, rc); + GOTO(free_lrd, rc); } /* Start the recovery thread */ - task = kthread_run(lod_sub_recovery_thread, lrd, "lod%04x_rec%04x", - master_index, index); + task = kthread_create(lod_sub_recovery_thread, lrd, "lod%04x_rec%04x", + master_index, index); if (IS_ERR(task)) { rc = PTR_ERR(task); CERROR("%s: cannot start recovery thread: rc = %d\n", obd->obd_name, rc); + lu_env_fini(&lrd->lrd_env); GOTO(out_llog, rc); } - - wait_event_idle(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING || - thread->t_flags & SVC_STOPPED); + *taskp = task; + wake_up_process(task); + wait_for_completion(&started); RETURN(0); out_llog: - lod_sub_fini_llog(env, dt, thread); -free_thread: - if (lod->lod_child != dt) { - OBD_FREE_PTR(subtgt->ltd_recovery_thread); - subtgt->ltd_recovery_thread = NULL; - } + lod_sub_fini_llog(env, dt, taskp); free_lrd: OBD_FREE_PTR(lrd); RETURN(rc); @@ -693,32 +687,22 @@ free_lrd: static void lod_sub_stop_recovery_threads(const struct lu_env *env, struct lod_device *lod) { - struct ptlrpc_thread *thread; + struct task_struct *task; struct lu_tgt_desc *mdt; /* * Stop the update log commit cancel threads and finish master * llog ctxt */ - thread = &lod->lod_child_recovery_thread; - /* Stop recovery thread first */ - if (thread && thread->t_flags & SVC_RUNNING) { - thread->t_flags = SVC_STOPPING; - wake_up(&thread->t_ctl_waitq); - wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED); - } + task = xchg(&lod->lod_child_recovery_task, NULL); + if (task) + kthread_stop(task); lod_getref(&lod->lod_mdt_descs); lod_foreach_mdt(lod, mdt) { - thread = mdt->ltd_recovery_thread; - if (thread && thread->t_flags & SVC_RUNNING) { - thread->t_flags = SVC_STOPPING; - wake_up(&thread->t_ctl_waitq); - wait_event(thread->t_ctl_waitq, - thread->t_flags & SVC_STOPPED); - OBD_FREE_PTR(mdt->ltd_recovery_thread); - mdt->ltd_recovery_thread = NULL; - } + task = xchg(&mdt->ltd_recovery_task, NULL); + if (task) + kthread_stop(task); } lod_putref(lod, &lod->lod_mdt_descs); } @@ -741,11 +725,11 @@ static void lod_sub_fini_all_llogs(const struct lu_env *env, * llog ctxt */ lod_sub_fini_llog(env, lod->lod_child, - &lod->lod_child_recovery_thread); + &lod->lod_child_recovery_task); lod_getref(&lod->lod_mdt_descs); lod_foreach_mdt(lod, mdt) lod_sub_fini_llog(env, mdt->ltd_tgt, - mdt->ltd_recovery_thread); + &mdt->ltd_recovery_task); lod_putref(lod, &lod->lod_mdt_descs); } diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index 4aa2b10..f474541 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -113,7 +113,7 @@ struct lod_device { struct lod_tgt_descs lod_mdt_descs; /* Recovery thread for lod_child */ - struct ptlrpc_thread lod_child_recovery_thread; + struct task_struct *lod_child_recovery_task; /* maximum EA size underlied OSD may have */ unsigned int lod_osd_max_easize; @@ -500,7 +500,7 @@ int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod, struct dt_device *dt); void lod_sub_fini_llog(const struct lu_env *env, - struct dt_device *dt, struct ptlrpc_thread *thread); + struct dt_device *dt, struct task_struct **taskp); int lodname2mdt_index(char *lodname, __u32 *mdt_index); extern void target_recovery_fini(struct obd_device *obd); diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index 37f2b2c..d70f46f 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -284,16 +284,10 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod, RETURN(rc); out_fini_llog: lod_sub_fini_llog(env, tgt_desc->ltd_tgt, - tgt_desc->ltd_recovery_thread); + &tgt_desc->ltd_recovery_task); out_ltd: down_write(<d->ltd_rw_sem); mutex_lock(<d->ltd_mutex); - if (!for_ost && LTD_TGT(ltd, index)->ltd_recovery_thread != NULL) { - struct ptlrpc_thread *thread; - - thread = LTD_TGT(ltd, index)->ltd_recovery_thread; - OBD_FREE_PTR(thread); - } lod_tgt_pool_remove(<d->ltd_tgt_pool, index); out_del_tgt: ltd_del_tgt(ltd, tgt_desc); @@ -334,9 +328,6 @@ static void __lod_del_device(const struct lu_env *env, struct lod_device *lod, lfsck_del_target(env, lod->lod_child, tgt->ltd_tgt, tgt->ltd_index, !ltd->ltd_is_mdt); - if (ltd->ltd_is_mdt && tgt->ltd_recovery_thread) - OBD_FREE_PTR(tgt->ltd_recovery_thread); - if (!tgt->ltd_reap) { tgt->ltd_reap = 1; ltd->ltd_death_row++;