LU-7330 ldlm: fix race of starting bl threads

author Niu Yawei <yawei.niu@intel.com>

Tue, 3 Nov 2015 06:59:32 +0000 (01:59 -0500)

committer Oleg Drokin <oleg.drokin@intel.com>

Wed, 11 Nov 2015 15:53:51 +0000 (15:53 +0000)
author Niu Yawei <yawei.niu@intel.com>
Tue, 3 Nov 2015 06:59:32 +0000 (01:59 -0500)
committer Oleg Drokin <oleg.drokin@intel.com>
Wed, 11 Nov 2015 15:53:51 +0000 (15:53 +0000)
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c

index 651451a..72a6fb2 100644 (file)
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -2600,7 +2600,6 @@ static int ldlm_bl_get_work(struct ldlm_bl_pool *blp,
  
  /* This only contains temporary data until the thread starts */
  struct ldlm_bl_thread_data {
-       char                    bltd_name[CFS_CURPROC_COMM_MAX];
         struct ldlm_bl_pool     *bltd_blp;
         struct completion       bltd_comp;
         int                     bltd_num;
@@ -2608,19 +2607,32 @@ struct ldlm_bl_thread_data {
  
  static int ldlm_bl_thread_main(void *arg);
  
-static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp)
+static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp, bool check_busy)
  {
         struct ldlm_bl_thread_data bltd = { .bltd_blp = blp };
         struct task_struct *task;
  
         init_completion(&bltd.bltd_comp);
-       bltd.bltd_num = atomic_read(&blp->blp_num_threads);
-       snprintf(bltd.bltd_name, sizeof(bltd.bltd_name) - 1,
-               "ldlm_bl_%02d", bltd.bltd_num);
-       task = kthread_run(ldlm_bl_thread_main, &bltd, bltd.bltd_name);
+
+       bltd.bltd_num = atomic_inc_return(&blp->blp_num_threads);
+       if (bltd.bltd_num >= blp->blp_max_threads) {
+               atomic_dec(&blp->blp_num_threads);
+               return 0;
+       }
+
+       LASSERTF(bltd.bltd_num > 0, "thread num:%d\n", bltd.bltd_num);
+       if (check_busy &&
+           atomic_read(&blp->blp_busy_threads) < (bltd.bltd_num - 1)) {
+               atomic_dec(&blp->blp_num_threads);
+               return 0;
+       }
+
+       task = kthread_run(ldlm_bl_thread_main, &bltd, "ldlm_bl_%02d",
+                          bltd.bltd_num);
         if (IS_ERR(task)) {
                 CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n",
-                      atomic_read(&blp->blp_num_threads), PTR_ERR(task));
+                      bltd.bltd_num, PTR_ERR(task));
+               atomic_dec(&blp->blp_num_threads);
                 return PTR_ERR(task);
         }
         wait_for_completion(&bltd.bltd_comp);
@@ -2632,12 +2644,11 @@ static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp)
  static int ldlm_bl_thread_need_create(struct ldlm_bl_pool *blp,
                                       struct ldlm_bl_work_item *blwi)
  {
-       int busy = atomic_read(&blp->blp_busy_threads);
-
-       if (busy >= blp->blp_max_threads)
+       if (atomic_read(&blp->blp_num_threads) >= blp->blp_max_threads)
                 return 0;
  
-       if (busy < atomic_read(&blp->blp_num_threads))
+       if (atomic_read(&blp->blp_busy_threads) <
+           atomic_read(&blp->blp_num_threads))
                 return 0;
  
         if (blwi != NULL && (blwi->blwi_ns == NULL ||
@@ -2726,9 +2737,6 @@ static int ldlm_bl_thread_main(void *arg)
  
         blp = bltd->bltd_blp;
  
-       atomic_inc(&blp->blp_num_threads);
-       atomic_inc(&blp->blp_busy_threads);
-
         complete(&bltd->bltd_comp);
         /* cannot use bltd after this, it is only on caller's stack */
  
@@ -2740,29 +2748,28 @@ static int ldlm_bl_thread_main(void *arg)
  
                 rc = ldlm_bl_get_work(blp, &blwi, &exp);
  
-               if (rc == 0) {
-                       atomic_dec(&blp->blp_busy_threads);
+               if (rc == 0)
                         l_wait_event_exclusive(blp->blp_waitq,
                                                ldlm_bl_get_work(blp, &blwi,
                                                                 &exp),
                                                &lwi);
-                       atomic_inc(&blp->blp_busy_threads);
-               }
+               atomic_inc(&blp->blp_busy_threads);
  
                 if (ldlm_bl_thread_need_create(blp, blwi))
                         /* discard the return value, we tried */
-                       ldlm_bl_thread_start(blp);
+                       ldlm_bl_thread_start(blp, true);
  
                 if (exp)
                         rc = ldlm_bl_thread_exports(blp, exp);
                 else if (blwi)
                         rc = ldlm_bl_thread_blwi(blp, blwi);
  
+               atomic_dec(&blp->blp_busy_threads);
+
                 if (rc == LDLM_ITER_STOP)
                         break;
         }
  
-       atomic_dec(&blp->blp_busy_threads);
         atomic_dec(&blp->blp_num_threads);
         complete(&blp->blp_comp);
         RETURN(0);
@@ -3042,7 +3049,7 @@ static int ldlm_setup(void)
         }
  
         for (i = 0; i < blp->blp_min_threads; i++) {
-               rc = ldlm_bl_thread_start(blp);
+               rc = ldlm_bl_thread_start(blp, false);
                 if (rc < 0)
                         GOTO(out, rc);
         }
author	Niu Yawei <yawei.niu@intel.com>
	Tue, 3 Nov 2015 06:59:32 +0000 (01:59 -0500)
committer	Oleg Drokin <oleg.drokin@intel.com>
	Wed, 11 Nov 2015 15:53:51 +0000 (15:53 +0000)