Whamcloud - gitweb
b=23820 Handle unsent requests with rq_net_err in ptlrpc_check_set()
[fs/lustre-release.git] / lustre / ptlrpc / client.c
index a844650..214be9c 100644 (file)
@@ -75,7 +75,7 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
 
         err = ptlrpc_uuid_to_peer(uuid, &peer, &self);
         if (err != 0) {
-                CDEBUG(D_NETERROR, "cannot find peer %s!\n", uuid->uuid);
+                CNETERR("cannot find peer %s!\n", uuid->uuid);
                 return NULL;
         }
 
@@ -194,6 +194,7 @@ void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
 
         desc->bd_nob += len;
 
+        cfs_page_pin(page);
         ptlrpc_add_bulk_page(desc, page, pageoffset, len);
 }
 
@@ -203,6 +204,7 @@ void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
  */
 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
 {
+        int i;
         ENTRY;
 
         LASSERT(desc != NULL);
@@ -217,6 +219,9 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
         else
                 class_import_put(desc->bd_import);
 
+        for (i = 0; i < desc->bd_iov_count ; i++)
+                cfs_page_unpin(desc->bd_iov[i].kiov_page);
+
         OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc,
                                 bd_iov[desc->bd_max_iov]));
         EXIT;
@@ -1036,8 +1041,6 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
         } else if (imp->imp_state == LUSTRE_IMP_CLOSED) {
                 DEBUG_REQ(D_ERROR, req, "IMP_CLOSED ");
                 *status = -EIO;
-        } else if (imp->imp_obd->obd_no_recov) {
-                *status = -ESHUTDOWN;
         } else if (ptlrpc_send_limit_expired(req)) {
                 /* probably doesn't need to be a D_ERROR after initial testing */
                 DEBUG_REQ(D_ERROR, req, "send limit expired ");
@@ -1049,7 +1052,7 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
                         DEBUG_REQ(D_ERROR, req, "invalidate in flight");
                         *status = -EIO;
                 }
-        } else if (imp->imp_invalid) {
+        } else if (imp->imp_invalid || imp->imp_obd->obd_no_recov) {
                 if (!imp->imp_deactive)
                           DEBUG_REQ(D_ERROR, req, "IMP_INVALID");
                 *status = -ESHUTDOWN; /* bz 12940 */
@@ -1308,6 +1311,10 @@ static int after_reply(struct ptlrpc_request *req)
                                 lustre_msg_get_last_committed(req->rq_repmsg);
                 }
                 ptlrpc_free_committed(imp);
+
+                if (req->rq_transno > imp->imp_peer_committed_transno)
+                        ptlrpc_pinger_commit_expected(imp);
+
                 cfs_spin_unlock(&imp->imp_lock);
         }
 
@@ -1487,6 +1494,15 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                         if (ptlrpc_client_recv_or_unlink(req) ||
                             ptlrpc_client_bulk_active(req))
                                 continue;
+                        /* If there is no need to resend, fail it now. */
+                        if (req->rq_no_resend) {
+                                if (req->rq_status == 0)
+                                        req->rq_status = -EIO;
+                                ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+                                GOTO(interpret, req->rq_status);
+                        } else {
+                                continue;
+                        }
                 }
 
                 if (req->rq_err) {
@@ -1740,14 +1756,20 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink)
         req->rq_timedout = 1;
         cfs_spin_unlock(&req->rq_lock);
 
-        DEBUG_REQ(req->rq_fake ? D_INFO : D_WARNING, req, 
-                  "Request x"LPU64" sent from %s to NID %s "CFS_DURATION_T"s "
-                  "ago has %s ("CFS_DURATION_T"s prior to deadline).\n",
+        DEBUG_REQ(req->rq_fake ? D_INFO : D_WARNING, req, "Request x"LPU64
+                  " sent from %s to NID %s has %s: [sent "CFS_DURATION_T"] "
+                  "[real_sent "CFS_DURATION_T"] [current "CFS_DURATION_T"] "
+                  "[deadline "CFS_DURATION_T"s] [delay "CFS_DURATION_T"s]",
                   req->rq_xid, imp ? imp->imp_obd->obd_name : "<?>",
-                  imp ? libcfs_nid2str(imp->imp_connection->c_peer.nid) : "<?>",
-                  cfs_time_sub(cfs_time_current_sec(), req->rq_sent),
-                  req->rq_net_err ? "failed due to network error" : "timed out",
-                  cfs_time_sub(req->rq_deadline, req->rq_sent));
+                  imp ? libcfs_nid2str(imp->imp_connection->c_peer.nid) : "<?>", 
+                  req->rq_net_err ? "failed due to network error" :
+                     ((req->rq_real_sent == 0 ||
+                       cfs_time_before(req->rq_real_sent, req->rq_sent) ||
+                       cfs_time_aftereq(req->rq_real_sent, req->rq_deadline)) ?
+                      "timed out for sent delay" : "timed out for slow reply"),
+                  req->rq_sent, req->rq_real_sent, cfs_time_current_sec(),
+                  cfs_time_sub(req->rq_deadline, req->rq_sent),
+                  cfs_time_sub(cfs_time_current_sec(), req->rq_deadline));
 
         if (imp != NULL && obd_debug_peer_on_timeout)
                 LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer);
@@ -2266,7 +2288,7 @@ void ptlrpc_free_committed(struct obd_import *imp)
 
         if (imp->imp_peer_committed_transno == imp->imp_last_transno_checked &&
             imp->imp_generation == imp->imp_last_generation_checked) {
-                CDEBUG(D_RPCTRACE, "%s: skip recheck: last_committed "LPU64"\n",
+                CDEBUG(D_INFO, "%s: skip recheck: last_committed "LPU64"\n",
                        imp->imp_obd->obd_name, imp->imp_peer_committed_transno);
                 EXIT;
                 return;
@@ -2305,7 +2327,7 @@ void ptlrpc_free_committed(struct obd_import *imp)
                         break;
                 }
 
-                DEBUG_REQ(D_RPCTRACE, req, "commit (last_committed "LPU64")",
+                DEBUG_REQ(D_INFO, req, "commit (last_committed "LPU64")",
                           imp->imp_peer_committed_transno);
 free_req:
                 cfs_spin_lock(&req->rq_lock);
@@ -2572,8 +2594,6 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
         ENTRY;
 
         LASSERT(req->rq_import->imp_state == LUSTRE_IMP_REPLAY);
-        /* Not handling automatic bulk replay yet (or ever?) */
-        LASSERT(req->rq_bulk == NULL);
 
         LASSERT (sizeof (*aa) <= sizeof (req->rq_async_args));
         aa = ptlrpc_req_async_args(req);
@@ -2710,7 +2730,7 @@ void ptlrpc_init_xid(void)
 
         cfs_spin_lock_init(&ptlrpc_last_xid_lock);
         if (now < YEAR_2004) {
-                ll_get_random_bytes(&ptlrpc_last_xid, sizeof(ptlrpc_last_xid));
+                cfs_get_random_bytes(&ptlrpc_last_xid, sizeof(ptlrpc_last_xid));
                 ptlrpc_last_xid >>= 2;
                 ptlrpc_last_xid |= (1ULL << 61);
         } else {