From: Fan Yong Date: Fri, 15 Jul 2016 05:33:01 +0000 (+0800) Subject: LU-8637 obdclass: LWP callback hold export reference X-Git-Tag: 2.8.59~6 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=acf46c8846d6c3893a52f5caba1eabea67c1bdba LU-8637 obdclass: LWP callback hold export reference The lwp_notify_main thread itself needs to hold the 'exp' reference to prevent 'exp' being freed during LWP callback. On the other hand, there is race condition between the lustre_register_lwp_item() and lustre_notify_lwp_list(): When lustre_register_lwp_item() (call it as thread1) adds 'lwp_register_item' into the 'lwp_register_list', the "*lri_exp" is NULL. After that, the thread1 releases the 'lwp_register_list_lock', then the LWP notify thread (call it as thread2) assigns the "lri::lri_exp" via the 'lustre_notify_lwp_list()' by race. That will cause thread1 to trigger the 'lri::lri_cb_func'. At the same time, thread2 will also trigger the 'lri::lri_cb_func'. Then the callback are called repeatedly. Signed-off-by: Fan Yong Change-Id: I9eb5407931149a818b620a393fdc70fec0d0d099 Reviewed-on: http://review.whamcloud.com/22724 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: wangdi Reviewed-by: Niu Yawei Reviewed-by: Oleg Drokin --- diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c index 55a9306..cf30df6 100644 --- a/lustre/obdclass/obd_mount_server.c +++ b/lustre/obdclass/obd_mount_server.c @@ -407,6 +407,7 @@ int lustre_register_lwp_item(const char *lwpname, struct obd_export **exp, { struct obd_device *lwp; struct lwp_register_item *lri; + bool cb = false; ENTRY; LASSERTF(strlen(lwpname) < MTI_NAME_MAXLEN, "lwpname is too long %s\n", @@ -445,9 +446,11 @@ int lustre_register_lwp_item(const char *lwpname, struct obd_export **exp, spin_lock(&lwp_register_list_lock); list_add(&lri->lri_list, &lwp_register_list); + if (*exp != NULL) + cb = true; spin_unlock(&lwp_register_list_lock); - if (*exp != NULL && cb_func != NULL) + if (cb && cb_func != NULL) cb_func(cb_data); lustre_put_lwp_item(lri); @@ -527,11 +530,12 @@ again: if (*lri->lri_exp != NULL) continue; *lri->lri_exp = class_export_get(exp); + if (lri->lri_cb_func == NULL) + continue; atomic_inc(&lri->lri_ref); spin_unlock(&lwp_register_list_lock); - if (lri->lri_cb_func != NULL) - lri->lri_cb_func(lri->lri_cb_data); + lri->lri_cb_func(lri->lri_cb_data); lustre_put_lwp_item(lri); /* Others may have changed the list after we unlock, we have diff --git a/lustre/osp/lwp_dev.c b/lustre/osp/lwp_dev.c index b28f32c..8f33435 100644 --- a/lustre/osp/lwp_dev.c +++ b/lustre/osp/lwp_dev.c @@ -403,6 +403,8 @@ static int lwp_notify_main(void *args) struct ptlrpc_thread *thread; LASSERT(exp != NULL); + class_export_get(exp); + lwp = lu2lwp_dev(exp->exp_obd->obd_lu_dev); thread = &lwp->lpd_notify_thread; @@ -411,6 +413,7 @@ static int lwp_notify_main(void *args) lustre_notify_lwp_list(exp); + class_export_put(exp); thread_set_flags(thread, SVC_STOPPED); wake_up(&thread->t_ctl_waitq); return 0;