Whamcloud - gitweb
LU-8637 obdclass: LWP callback hold export reference 24/22724/2
authorFan Yong <fan.yong@intel.com>
Fri, 15 Jul 2016 05:33:01 +0000 (13:33 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 5 Oct 2016 03:51:25 +0000 (03:51 +0000)
The lwp_notify_main thread itself needs to hold the 'exp'
reference to prevent 'exp' being freed during LWP callback.

On the other hand, there is race condition between the
lustre_register_lwp_item() and lustre_notify_lwp_list():

When lustre_register_lwp_item() (call it as thread1) adds
'lwp_register_item' into the 'lwp_register_list', the
"*lri_exp" is NULL. After that, the thread1 releases the
'lwp_register_list_lock', then the LWP notify thread (call
it as thread2) assigns the "lri::lri_exp" via the
'lustre_notify_lwp_list()' by race. That will cause thread1
to trigger the 'lri::lri_cb_func'. At the same time, thread2
will also trigger the 'lri::lri_cb_func'. Then the callback
are called repeatedly.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I9eb5407931149a818b620a393fdc70fec0d0d099
Reviewed-on: http://review.whamcloud.com/22724
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: wangdi <di.wang@intel.com>
Reviewed-by: Niu Yawei <yawei.niu@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/obdclass/obd_mount_server.c
lustre/osp/lwp_dev.c

index 55a9306..cf30df6 100644 (file)
@@ -407,6 +407,7 @@ int lustre_register_lwp_item(const char *lwpname, struct obd_export **exp,
 {
        struct obd_device        *lwp;
        struct lwp_register_item *lri;
+       bool cb = false;
        ENTRY;
 
        LASSERTF(strlen(lwpname) < MTI_NAME_MAXLEN, "lwpname is too long %s\n",
@@ -445,9 +446,11 @@ int lustre_register_lwp_item(const char *lwpname, struct obd_export **exp,
 
        spin_lock(&lwp_register_list_lock);
        list_add(&lri->lri_list, &lwp_register_list);
+       if (*exp != NULL)
+               cb = true;
        spin_unlock(&lwp_register_list_lock);
 
-       if (*exp != NULL && cb_func != NULL)
+       if (cb && cb_func != NULL)
                cb_func(cb_data);
        lustre_put_lwp_item(lri);
 
@@ -527,11 +530,12 @@ again:
                if (*lri->lri_exp != NULL)
                        continue;
                *lri->lri_exp = class_export_get(exp);
+               if (lri->lri_cb_func == NULL)
+                       continue;
                atomic_inc(&lri->lri_ref);
                spin_unlock(&lwp_register_list_lock);
 
-               if (lri->lri_cb_func != NULL)
-                       lri->lri_cb_func(lri->lri_cb_data);
+               lri->lri_cb_func(lri->lri_cb_data);
                lustre_put_lwp_item(lri);
 
                /* Others may have changed the list after we unlock, we have
index b28f32c..8f33435 100644 (file)
@@ -403,6 +403,8 @@ static int lwp_notify_main(void *args)
        struct ptlrpc_thread    *thread;
 
        LASSERT(exp != NULL);
+       class_export_get(exp);
+
        lwp = lu2lwp_dev(exp->exp_obd->obd_lu_dev);
        thread = &lwp->lpd_notify_thread;
 
@@ -411,6 +413,7 @@ static int lwp_notify_main(void *args)
 
        lustre_notify_lwp_list(exp);
 
+       class_export_put(exp);
        thread_set_flags(thread, SVC_STOPPED);
        wake_up(&thread->t_ctl_waitq);
        return 0;