From c02e565656ad806e1390b3054c2bc73828220040 Mon Sep 17 00:00:00 2001 From: bwzhou Date: Tue, 10 Jun 2008 07:42:25 +0000 Subject: [PATCH] Branch HEAD b=15759 r=rread, green Description: MDS or OSS service threads fail startup with -24 (-EMFILE) Details : During startup under recovery, it is possible for service thread startup to fail in ptlrpc_start_threads() if one of the threads begins processing a request and then starts an additional thread. This causes ptlrpc_start_threads() to try and start 1 too many threads and get an error. --- lustre/include/obd_support.h | 1 + lustre/ptlrpc/service.c | 7 ++++++- lustre/tests/conf-sanity.sh | 13 +++++++++++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 26e5eb7..853e7a8 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -243,6 +243,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_TGT_DELAY_CONNECT 0x703 #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704 #define OBD_FAIL_TGT_DELAY_PRECREATE 0x705 +#define OBD_FAIL_TGT_TOOMANY_THREADS 0x706 #define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 #define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801 diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 4cda095..46cb740 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1165,6 +1165,9 @@ int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc) LASSERT(svc->srv_threads_min > 0); for (i = 0; i < svc->srv_threads_min; i++) { rc = ptlrpc_start_thread(dev, svc); + /* We have enough threads, don't start more. b=15759 */ + if (rc == -EMFILE) + break; if (rc) { CERROR("cannot start %s thread #%d: rc %d\n", svc->srv_thread_name, i, rc); @@ -1186,7 +1189,9 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc) CDEBUG(D_RPCTRACE, "%s started %d min %d max %d running %d\n", svc->srv_name, svc->srv_threads_started, svc->srv_threads_min, svc->srv_threads_max, svc->srv_threads_running); - if (svc->srv_threads_started >= svc->srv_threads_max) + if (unlikely(svc->srv_threads_started >= svc->srv_threads_max) || + (OBD_FAIL_CHECK(OBD_FAIL_TGT_TOOMANY_THREADS) && + svc->srv_threads_started == svc->srv_threads_min - 1)) RETURN(-EMFILE); OBD_ALLOC_PTR(thread); diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 06b00ec..b51b187 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -1559,7 +1559,16 @@ test_39() { } run_test 39 "leak_finder recognizes both LUSTRE and LNET malloc messages" -test_40() { #bug 14134 +test_40() { # bug 15759 + start_ost + #define OBD_FAIL_TGT_TOOMANY_THREADS 0x706 + do_facet mds "sysctl -w lustre.fail_loc=0x80000706" + start_mds + cleanup +} +run_test 40 "race during service thread startup" + +test_41() { #bug 14134 local rc start mds $MDSDEV $MDS_MOUNT_OPTS -o nosvc start ost `ostdevname 1` $OST_MOUNT_OPTS @@ -1578,7 +1587,7 @@ test_40() { #bug 14134 unload_modules || return 204 return $rc } -run_test 40 "mount mds with --nosvc and --nomgs" +run_test 41 "mount mds with --nosvc and --nomgs" umount_client $MOUNT cleanup_nocli -- 1.8.3.1