Whamcloud - gitweb
Branch b1_6
authorbwzhou <bwzhou>
Sat, 24 May 2008 07:51:17 +0000 (07:51 +0000)
committerbwzhou <bwzhou>
Sat, 24 May 2008 07:51:17 +0000 (07:51 +0000)
b=15759
r=rread, green

Don't consider -EMFILE (too many threads) a fatal startup error

lustre/include/obd_support.h
lustre/ptlrpc/service.c
lustre/tests/conf-sanity.sh

index e97b58d..792a6d4 100644 (file)
@@ -253,6 +253,7 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_FAIL_TGT_DELAY_CONNECT       0x703
 #define OBD_FAIL_TGT_DELAY_RECONNECT     0x704
 #define OBD_FAIL_TGT_DELAY_PRECREATE     0x705
+#define OBD_FAIL_TGT_TOOMANY_THREADS     0x706
 
 #define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
 #define OBD_FAIL_MDC_ENQUEUE_PAUSE       0x801
index 2915102..fb23933 100644 (file)
@@ -1500,11 +1500,14 @@ int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc)
         int i, rc = 0;
         ENTRY;
 
-        /* We require 2 threads min - see note in 
-           ptlrpc_server_handle_request */
+        /* We require 2 threads min - see note in
+         * ptlrpc_server_handle_request() */
         LASSERT(svc->srv_threads_min >= 2);
         for (i = 0; i < svc->srv_threads_min; i++) {
                 rc = ptlrpc_start_thread(dev, svc);
+                /* We have enough threads, don't start more.  b=15759 */
+                if (rc == -EMFILE)
+                        break;
                 if (rc) {
                         CERROR("cannot start %s thread #%d: rc %d\n",
                                svc->srv_thread_name, i, rc);
@@ -1526,7 +1529,9 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc)
         CDEBUG(D_RPCTRACE, "%s started %d min %d max %d running %d\n",
                svc->srv_name, svc->srv_threads_started, svc->srv_threads_min,
                svc->srv_threads_max, svc->srv_threads_running);
-        if (svc->srv_threads_started >= svc->srv_threads_max)
+        if (unlikely(svc->srv_threads_started >= svc->srv_threads_max) ||
+            (OBD_FAIL_CHECK(OBD_FAIL_TGT_TOOMANY_THREADS) &&
+             svc->srv_threads_started == svc->srv_threads_min - 1))
                 RETURN(-EMFILE);
 
         OBD_ALLOC(thread, sizeof(*thread));
index 8ab40b9..36a7c17 100644 (file)
@@ -1568,5 +1568,14 @@ test_39() { #bug 14413
 }
 run_test 39 "leak_finder recognizes both LUSTRE and LNET malloc messages"
 
+test_40() { # bug 15759
+       start_ost
+       #define OBD_FAIL_TGT_TOOMANY_THREADS     0x706
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000706"
+       start_mds
+       cleanup
+}
+run_test 40 "race during service thread startup"
+
 equals_msg `basename $0`: test complete
 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG || true