Whamcloud - gitweb
LU-4624 llite: Avoid statahead thread start/stop deadlocks 82/12182/2
authorChristopher J. Morrone <morrone2@llnl.gov>
Fri, 21 Feb 2014 22:46:47 +0000 (14:46 -0800)
committerOleg Drokin <oleg.drokin@intel.com>
Sat, 11 Oct 2014 04:43:52 +0000 (04:43 +0000)
The statahead and statahead agl threads blindly set their
thread state to SVC_RUNNING without checking the state first.  If, for
instance, another thread sets the state to SVC_STOPPING that
stop signal will now have been lost.  Deadlock ensues.

We also partly improve the sai reference counting, because a race exists
where the ll_stop_statahead thread can drop the default reference, and
the statahead thread can exit and drop its reference as well.  With no
references on the sai, the final put will poison and free the buffer.  The
original do_statahead_enter() function may then continue to access
the buffer after it is freed because it did not take a reference of its
own.  We add a local reference to address that.

Lustre-commit: 373d937086e5cede6fc1a3b3f859622c0173c1de
Lustre-change: http://review.whamcloud.com/9358

Change-Id: I531f17966d49ba1e6ebd99abe9cb8b128eeac4f9
Signed-off-by: Christopher J. Morrone <morrone2@llnl.gov>
Signed-off-by: Bob Glossman <bob.glossman@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Fan Yong <fan.yong@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Reviewed-on: http://review.whamcloud.com/12182
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
lustre/llite/statahead.c

index 26fb4e7..e473ee7 100644 (file)
@@ -984,7 +984,11 @@ static int ll_agl_thread(void *arg)
        atomic_inc(&sbi->ll_agl_total);
        spin_lock(&plli->lli_agl_lock);
        sai->sai_agl_valid = 1;
-       thread_set_flags(thread, SVC_RUNNING);
+       if (thread_is_init(thread))
+               /* If someone else has changed the thread state
+                * (e.g. already changed to SVC_STOPPING), we can't just
+                * blindly overwrite that setting. */
+               thread_set_flags(thread, SVC_RUNNING);
        spin_unlock(&plli->lli_agl_lock);
        wake_up(&thread->t_ctl_waitq);
 
@@ -1081,7 +1085,11 @@ static int ll_statahead_thread(void *arg)
 
        atomic_inc(&sbi->ll_sa_total);
        spin_lock(&plli->lli_sa_lock);
-       thread_set_flags(thread, SVC_RUNNING);
+       if (thread_is_init(thread))
+               /* If someone else has changed the thread state
+                * (e.g. already changed to SVC_STOPPING), we can't just
+                * blindly overwrite that setting. */
+               thread_set_flags(thread, SVC_RUNNING);
        spin_unlock(&plli->lli_sa_lock);
        wake_up(&thread->t_ctl_waitq);
 
@@ -1689,6 +1697,12 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
        CDEBUG(D_READA, "start statahead thread: [pid %d] [parent %.*s]\n",
               current_pid(), parent->d_name.len, parent->d_name.name);
 
+       /* The sai buffer already has one reference taken at allocation time,
+        * but as soon as we expose the sai by attaching it to the lli that default
+        * reference can be dropped by another thread calling ll_stop_statahead.
+        * We need to take a local reference to protect the sai buffer while we intend
+        * to access it. */
+        ll_sai_get(sai);
         lli->lli_sai = sai;
 
        plli = ll_i2info(parent->d_inode);
@@ -1702,7 +1716,10 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
                 lli->lli_opendir_key = NULL;
                 thread_set_flags(thread, SVC_STOPPED);
                 thread_set_flags(&sai->sai_agl_thread, SVC_STOPPED);
+               /* Drop both our own local reference and the default
+                * reference from allocation time. */
                 ll_sai_put(sai);
+               ll_sai_put(sai);
                 LASSERT(lli->lli_sai == NULL);
                 RETURN(-EAGAIN);
         }
@@ -1710,6 +1727,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
         l_wait_event(thread->t_ctl_waitq,
                      thread_is_running(thread) || thread_is_stopped(thread),
                      &lwi);
+       ll_sai_put(sai);
 
         /*
          * We don't stat-ahead for the first dirent since we are already in