Whamcloud - gitweb
Fix resend race during recovery (don't set rq_timeout = 0).
authoradilger <adilger>
Mon, 12 Apr 2004 21:29:37 +0000 (21:29 +0000)
committeradilger <adilger>
Mon, 12 Apr 2004 21:29:37 +0000 (21:29 +0000)
b=2950
r=robert

lustre/include/linux/lustre_net.h
lustre/include/linux/obd.h
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/niobuf.c

index 23e72f6..43d5445 100644 (file)
@@ -267,7 +267,7 @@ struct ptlrpc_request {
         unsigned int rq_intr:1, rq_replied:1, rq_err:1,
                 rq_timedout:1, rq_resend:1, rq_restart:1, rq_replay:1,
                 rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
-                rq_no_delay:1;
+                rq_no_delay:1, rq_net_err:1;
         int rq_phase;
         /* client-side refcount for SENT race */
         atomic_t rq_refcount;
index b5d47f1..185424e 100644 (file)
@@ -463,9 +463,9 @@ struct obd_device {
         struct obd_uuid obd_uuid;
 
         int obd_minor;
-        int obd_attached:1, obd_set_up:1, obd_recovering:1,
-            obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1,
-            obd_no_recov:1, obd_stopping:1;
+        unsigned int obd_attached:1, obd_set_up:1, obd_recovering:1,
+                obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1,
+                obd_no_recov:1, obd_stopping:1;
         atomic_t obd_refcount;
         wait_queue_head_t obd_refcount_waitq;
         struct proc_dir_entry *obd_proc_entry;
index 70db906..45bae96 100644 (file)
@@ -406,6 +406,13 @@ static int ptlrpc_check_reply(struct ptlrpc_request *req)
                 DEBUG_REQ(D_NET, req, "REPLIED:");
                 GOTO(out, rc = 1);
         }
+        
+        if (req->rq_net_err && !req->rq_timedout) {
+                spin_unlock_irqrestore (&req->rq_lock, flags);
+                ptlrpc_expire_one_request(req); 
+                spin_lock_irqsave (&req->rq_lock, flags);
+                GOTO(out, rc = 0);
+        }
 
         if (req->rq_err) {
                 DEBUG_REQ(D_ERROR, req, "ABORTED:");
@@ -583,7 +590,7 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
         rc = ptl_send_rpc(req);
         if (rc) {
                 DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc);
-                req->rq_timeout = 1;
+                req->rq_net_err = 1;
                 RETURN(rc);
         }
         RETURN(0);
@@ -656,6 +663,10 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                 }
 
                 if (req->rq_phase == RQ_PHASE_RPC) {
+                        if (req->rq_net_err && !req->rq_timedout) {
+                                ptlrpc_expire_one_request(req); 
+                                continue;
+                        }
                         if (req->rq_waiting || req->rq_resend) {
                                 int status;
 
@@ -713,7 +724,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                                         DEBUG_REQ(D_HA, req, "send failed (%d)",
                                                   rc);
                                         force_timer_recalc = 1;
-                                        req->rq_timeout = 0;
+                                        req->rq_net_err = 1;
                                 }
                                 /* need to reset the timeout */
                                 force_timer_recalc = 1;
@@ -1203,10 +1214,11 @@ void ptlrpc_resend_req(struct ptlrpc_request *req)
 
         spin_lock_irqsave (&req->rq_lock, flags);
         req->rq_resend = 1;
+        req->rq_net_err = 0;
         req->rq_timedout = 0;
         if (req->rq_bulk) {
                 __u64 old_xid = req->rq_xid;
-                
+
                 /* ensure previous bulk fails */
                 req->rq_xid = ptlrpc_next_xid();
                 CDEBUG(D_HA, "resend bulk old x"LPU64" new x"LPU64"\n",
index 343ccba..3b366b3 100644 (file)
@@ -57,7 +57,7 @@ void request_out_callback(ptl_event_t *ev)
                  * like failing sends in client.c does currently...  */
 
                 spin_lock_irqsave(&req->rq_lock, flags);
-                req->rq_timeout = 0;
+                req->rq_net_err = 1;
                 spin_unlock_irqrestore(&req->rq_lock, flags);
                 
                 ptlrpc_wake_client_req(req);
index bb23f3f..4eff5c5 100644 (file)
@@ -458,6 +458,7 @@ int ptl_send_rpc(struct ptlrpc_request *request)
         request->rq_replied = 0;
         request->rq_err = 0;
         request->rq_timedout = 0;
+        request->rq_net_err = 0;
         request->rq_resend = 0;
         request->rq_restart = 0;
         spin_unlock_irqrestore (&request->rq_lock, flags);