Whamcloud - gitweb
b=21411 Avoid infinite loop when bulk IO delayed.
[fs/lustre-release.git] / lustre / ptlrpc / ptlrpcd.c
index 8699e7d..dde226c 100644 (file)
@@ -16,8 +16,8 @@
  * in the LICENSE file that accompanied this code).
  *
  * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see [sun.com URL with a
- * copy of GPLv2].
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  * CA 95054 USA or visit www.sun.com if you need additional information or
@@ -45,6 +45,7 @@
 # include <ctype.h>
 #endif
 
+#include <libcfs/kp30.h>
 #include <lustre_net.h>
 # include <lustre_lib.h>
 
@@ -68,11 +69,33 @@ void ptlrpcd_wake(struct ptlrpc_request *req)
         cfs_waitq_signal(&rq_set->set_waitq);
 }
 
-/* 
+/*
+ * Move all request from an existing request set to the ptlrpcd queue.
+ * All requests from the set must be in phase RQ_PHASE_NEW.
+ */
+void ptlrpcd_add_rqset(struct ptlrpc_request_set *set)
+{
+        struct list_head *tmp, *pos;
+
+        list_for_each_safe(pos, tmp, &set->set_requests) {
+                struct ptlrpc_request *req =
+                        list_entry(pos, struct ptlrpc_request, rq_set_chain);
+
+                LASSERT(req->rq_phase == RQ_PHASE_NEW);
+                list_del_init(&req->rq_set_chain);
+                req->rq_set = NULL;
+                ptlrpcd_add_req(req);
+                set->set_remaining--;
+        }
+        LASSERT(set->set_remaining == 0);
+}
+EXPORT_SYMBOL(ptlrpcd_add_rqset);
+
+/*
  * Requests that are added to the ptlrpcd queue are sent via
  * ptlrpcd_check->ptlrpc_check_set().
  */
-void ptlrpcd_add_req(struct ptlrpc_request *req)
+int ptlrpcd_add_req(struct ptlrpc_request *req)
 {
         struct ptlrpcd_ctl *pc;
         int rc;
@@ -81,27 +104,21 @@ void ptlrpcd_add_req(struct ptlrpc_request *req)
                 pc = &ptlrpcd_pc;
         else
                 pc = &ptlrpcd_recovery_pc;
-
         rc = ptlrpc_set_add_new_req(pc, req);
         if (rc) {
-                int (*interpreter)(struct ptlrpc_request *,
-                                   void *, int);
-                                   
-                interpreter = req->rq_interpret_reply;
-
                 /*
                  * Thread is probably in stop now so we need to
                  * kill this rpc as it was not added. Let's call
                  * interpret for it to let know we're killing it
                  * so that higher levels might free assosiated
                  * resources.
-                 */
-                req->rq_status = -EBADR;
-                interpreter(req, &req->rq_async_args,
-                            req->rq_status);
+                */
+
+                ptlrpc_req_interpret(req, -EBADR);
                 req->rq_set = NULL;
                 ptlrpc_req_finished(req);
         }
+        return rc;
 }
 
 static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
@@ -111,16 +128,13 @@ static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
         int rc = 0;
         ENTRY;
 
-        if (test_bit(LIOD_STOP, &pc->pc_flags))
-                RETURN(1);
-
         spin_lock(&pc->pc_set->set_new_req_lock);
         list_for_each_safe(pos, tmp, &pc->pc_set->set_new_requests) {
                 req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
                 list_del_init(&req->rq_set_chain);
                 ptlrpc_set_add_req(pc->pc_set, req);
-                /* 
-                 * Need to calculate its timeout. 
+                /*
+                 * Need to calculate its timeout.
                  */
                 rc = 1;
         }
@@ -129,9 +143,9 @@ static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
         if (pc->pc_set->set_remaining) {
                 rc = rc | ptlrpc_check_set(pc->pc_set);
 
-                /* 
+                /*
                  * XXX: our set never completes, so we prune the completed
-                 * reqs after each iteration. boy could this be smarter. 
+                 * reqs after each iteration. boy could this be smarter.
                  */
                 list_for_each_safe(pos, tmp, &pc->pc_set->set_requests) {
                         req = list_entry(pos, struct ptlrpc_request,
@@ -146,8 +160,8 @@ static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
         }
 
         if (rc == 0) {
-                /* 
-                 * If new requests have been added, make sure to wake up. 
+                /*
+                 * If new requests have been added, make sure to wake up.
                  */
                 spin_lock(&pc->pc_set->set_new_req_lock);
                 rc = !list_empty(&pc->pc_set->set_new_requests);
@@ -158,7 +172,7 @@ static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
 }
 
 #ifdef __KERNEL__
-/* 
+/*
  * ptlrpc's code paths like to execute in process context, so we have this
  * thread which spins on a set which contains the io rpcs. llite specifies
  * ptlrpcd's set when it pushes pages down into the oscs.
@@ -166,7 +180,7 @@ static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
 static int ptlrpcd(void *arg)
 {
         struct ptlrpcd_ctl *pc = arg;
-        int rc;
+        int rc, exit = 0;
         ENTRY;
 
         if ((rc = cfs_daemonize_ctxt(pc->pc_name))) {
@@ -176,33 +190,39 @@ static int ptlrpcd(void *arg)
 
         complete(&pc->pc_starting);
 
-        /* 
+        /*
          * This mainloop strongly resembles ptlrpc_set_wait() except that our
          * set never completes.  ptlrpcd_check() calls ptlrpc_check_set() when
-         * there are requests in the set. New requests come in on the set's 
-         * new_req_list and ptlrpcd_check() moves them into the set. 
+         * there are requests in the set. New requests come in on the set's
+         * new_req_list and ptlrpcd_check() moves them into the set.
          */
-        while (1) {
+        do {
                 struct l_wait_info lwi;
-                cfs_duration_t timeout;
+                int timeout;
 
-                timeout = cfs_time_seconds(ptlrpc_set_next_timeout(pc->pc_set));
-                lwi = LWI_TIMEOUT(timeout, ptlrpc_expired_set, pc->pc_set);
+                timeout = ptlrpc_set_next_timeout(pc->pc_set);
+                lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1),
+                                  ptlrpc_expired_set, pc->pc_set);
 
                 l_wait_event(pc->pc_set->set_waitq, ptlrpcd_check(pc), &lwi);
 
                 /*
                  * Abort inflight rpcs for forced stop case.
                  */
-                if (test_bit(LIOD_STOP_FORCE, &pc->pc_flags))
-                        ptlrpc_abort_set(pc->pc_set);
+                if (test_bit(LIOD_STOP, &pc->pc_flags)) {
+                        if (test_bit(LIOD_FORCE, &pc->pc_flags))
+                                ptlrpc_abort_set(pc->pc_set);
+                        exit++;
+                }
 
-                if (test_bit(LIOD_STOP, &pc->pc_flags))
-                        break;
-        }
+                /*
+                 * Let's make one more loop to make sure that ptlrpcd_check()
+                 * copied all raced new rpcs into the set so we can kill them.
+                 */
+        } while (exit < 2);
 
-        /* 
-         * Wait for inflight requests to drain. 
+        /*
+         * Wait for inflight requests to drain.
          */
         if (!list_empty(&pc->pc_set->set_requests))
                 ptlrpc_set_wait(pc->pc_set);
@@ -211,6 +231,7 @@ static int ptlrpcd(void *arg)
 out:
         clear_bit(LIOD_START, &pc->pc_flags);
         clear_bit(LIOD_STOP, &pc->pc_flags);
+        clear_bit(LIOD_FORCE, &pc->pc_flags);
         return 0;
 }
 
@@ -221,8 +242,8 @@ int ptlrpcd_check_async_rpcs(void *arg)
         struct ptlrpcd_ctl *pc = arg;
         int                  rc = 0;
 
-        /* 
-         * Single threaded!! 
+        /*
+         * Single threaded!!
          */
         pc->pc_recurred++;
 
@@ -230,8 +251,8 @@ int ptlrpcd_check_async_rpcs(void *arg)
                 rc = ptlrpcd_check(pc);
                 if (!rc)
                         ptlrpc_expired_set(pc->pc_set);
-                /* 
-                 * XXX: send replay requests. 
+                /*
+                 * XXX: send replay requests.
                  */
                 if (pc == &ptlrpcd_recovery_pc)
                         rc = ptlrpcd_check(pc);
@@ -255,20 +276,21 @@ int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
 {
         int rc = 0;
         ENTRY;
-        /* 
-         * Do not allow start second thread for one pc. 
+
+        /*
+         * Do not allow start second thread for one pc.
          */
-        if (test_and_set_bit(LIOD_START, &pc->pc_flags)) {
+        if (test_bit(LIOD_START, &pc->pc_flags)) {
                 CERROR("Starting second thread (%s) for same pc %p\n",
                        name, pc);
                 RETURN(-EALREADY);
         }
 
+        set_bit(LIOD_START, &pc->pc_flags);
         init_completion(&pc->pc_starting);
         init_completion(&pc->pc_finishing);
         spin_lock_init(&pc->pc_lock);
-        snprintf (pc->pc_name, sizeof (pc->pc_name), name);
+        strncpy(pc->pc_name, name, sizeof(pc->pc_name) - 1);
 
         pc->pc_set = ptlrpc_prep_set();
         if (pc->pc_set == NULL)
@@ -305,7 +327,7 @@ void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force)
 
         set_bit(LIOD_STOP, &pc->pc_flags);
         if (force)
-                set_bit(LIOD_STOP_FORCE, &pc->pc_flags);
+                set_bit(LIOD_FORCE, &pc->pc_flags);
         cfs_waitq_signal(&pc->pc_set->set_waitq);
 #ifdef __KERNEL__
         wait_for_completion(&pc->pc_finishing);