b=13813

author nathan <nathan>

Mon, 5 Nov 2007 19:30:38 +0000 (19:30 +0000)

committer nathan <nathan>

Mon, 5 Nov 2007 19:30:38 +0000 (19:30 +0000)
author nathan <nathan>
Mon, 5 Nov 2007 19:30:38 +0000 (19:30 +0000)
committer nathan <nathan>
Mon, 5 Nov 2007 19:30:38 +0000 (19:30 +0000)
diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h

index 5c8d859..6fbde89 100644 (file)
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -694,6 +694,7 @@ void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool);
  void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
  struct ptlrpc_request_pool *ptlrpc_init_rq_pool(int, int,
                                                  void (*populate_pool)(struct ptlrpc_request_pool *, int));
+void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req);
  struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version,
                                         int opcode, int count, int *lengths,
                                         char **bufs);
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c

index fa8a5cd..8b5c280 100644 (file)
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -918,9 +918,9 @@ int ldlm_cli_cancel_req(struct obd_export *exp,
                  req->rq_no_resend = 1;
                  req->rq_no_delay = 1;
  
-                /* XXX FIXME bug 249 */
                  req->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL;
                  req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
+                ptlrpc_at_set_req_timeout(req);
  
                  body = lustre_msg_buf(req->rq_reqmsg, DLM_LOCKREQ_OFF,
                                        sizeof(*body));
diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c

index e983e85..37f3a29 100644 (file)
--- a/lustre/mdc/mdc_reint.c
+++ b/lustre/mdc/mdc_reint.c
@@ -136,7 +136,8 @@ int mdc_setattr(struct obd_export *exp, struct mdc_op_data *op_data,
                  RETURN(-ENOMEM);
  
          if (iattr->ia_valid & ATTR_FROM_OPEN) {
-                req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249
+                req->rq_request_portal = MDS_SETATTR_PORTAL;
+                ptlrpc_at_set_req_timeout(req);
                  rpc_lock = obd->u.cli.cl_setattr_lock;
          } else {
                  rpc_lock = obd->u.cli.cl_rpc_lock;
diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c

index fc3358e..7b6079d 100644 (file)
--- a/lustre/mdc/mdc_request.c
+++ b/lustre/mdc/mdc_request.c
@@ -678,8 +678,8 @@ int mdc_close(struct obd_export *exp, struct obdo *oa,
          /* To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a
           * portal whose threads are not taking any DLM locks and are therefore
           * always progressing */
-        /* XXX FIXME bug 249 */
          req->rq_request_portal = MDS_READPAGE_PORTAL;
+        ptlrpc_at_set_req_timeout(req);
  
          /* Ensure that this close's handle is fixed up during replay. */
          LASSERT(och != NULL);
@@ -787,8 +787,8 @@ int mdc_readpage(struct obd_export *exp, struct ll_fid *fid, __u64 offset,
          if (req == NULL)
                  GOTO(out, rc = -ENOMEM);
  
-        /* XXX FIXME bug 249 */
          req->rq_request_portal = MDS_READPAGE_PORTAL;
+        ptlrpc_at_set_req_timeout(req);
  
          desc = ptlrpc_prep_bulk_imp(req, 1, BULK_PUT_SINK, MDS_BULK_PORTAL);
          if (desc == NULL)
diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c

index 90e0019..71c8986 100644 (file)
--- a/lustre/osc/osc_create.c
+++ b/lustre/osc/osc_create.c
@@ -160,7 +160,8 @@ static int oscc_internal_create(struct osc_creator *oscc)
                  RETURN(-ENOMEM);
          }
  
-        request->rq_request_portal = OST_CREATE_PORTAL; //XXX FIXME bug 249
+        request->rq_request_portal = OST_CREATE_PORTAL;
+        ptlrpc_at_set_req_timeout(request);
          body = lustre_msg_buf(request->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
  
          spin_lock(&oscc->oscc_lock);
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c

index 9e6fec3..42b877d 100644 (file)
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -478,6 +478,7 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo,
                  RETURN(-ENOMEM);
  
          req->rq_request_portal = OST_IO_PORTAL;         /* bug 7198 */
+        ptlrpc_at_set_req_timeout(req);
  
          body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
          memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
@@ -610,6 +611,7 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa,
                  RETURN(-ENOMEM);
  
          req->rq_request_portal = OST_IO_PORTAL;         /* bug 7198 */
+        ptlrpc_at_set_req_timeout(req);
  
          body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
  
@@ -942,6 +944,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa,
                  RETURN (-ENOMEM);
  
          req->rq_request_portal = OST_IO_PORTAL;         /* bug 7198 */
+        ptlrpc_at_set_req_timeout(req);
  
          if (opc == OST_WRITE)
                  desc = ptlrpc_prep_bulk_imp (req, page_count,
@@ -3048,7 +3051,8 @@ static int osc_statfs_async(struct obd_device *obd, struct obd_info *oinfo,
                  RETURN(-ENOMEM);
  
          ptlrpc_req_set_repsize(req, 2, size);
-        req->rq_request_portal = OST_CREATE_PORTAL; //XXX FIXME bug 249
+        req->rq_request_portal = OST_CREATE_PORTAL;
+        ptlrpc_at_set_req_timeout(req);
  
          req->rq_interpret_reply = osc_statfs_interpret;
          CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
@@ -3079,7 +3083,8 @@ static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
                  RETURN(-ENOMEM);
  
          ptlrpc_req_set_repsize(req, 2, size);
-        req->rq_request_portal = OST_CREATE_PORTAL; //XXX FIXME bug 249
+        req->rq_request_portal = OST_CREATE_PORTAL;
+        ptlrpc_at_set_req_timeout(req);
  
          rc = ptlrpc_queue_wait(req);
          if (rc)
diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c

index 056ed0d..69e12f9 100644 (file)
--- a/lustre/ost/ost_handler.c
+++ b/lustre/ost/ost_handler.c
@@ -812,6 +812,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                  else
                          rc = ptlrpc_start_bulk_transfer(desc);
                  if (rc == 0) {
+                        time_t start = cfs_time_current_sec();
                          do {
                                  long timeoutl = req->rq_deadline - 
                                          cfs_time_current_sec();
@@ -830,7 +831,11 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                                   (req->rq_deadline > cfs_time_current_sec()));
  
                          if (rc == -ETIMEDOUT) {
-                                DEBUG_REQ(D_ERROR, req, "timeout on bulk PUT");
+                                DEBUG_REQ(D_ERROR, req,
+                                          "timeout on bulk PUT after %ld%+lds",
+                                          req->rq_deadline - start,
+                                          cfs_time_current_sec() -
+                                          req->rq_deadline);
                                  ptlrpc_abort_bulk(desc);
                          } else if (exp->exp_failed) {
                                  DEBUG_REQ(D_ERROR, req, "Eviction on bulk PUT");
@@ -1063,6 +1068,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
          else
                  rc = ptlrpc_start_bulk_transfer (desc);
          if (rc == 0) {
+                time_t start = cfs_time_current_sec();
                  do {
                          long timeoutl = req->rq_deadline - 
                                  cfs_time_current_sec();
@@ -1079,7 +1085,11 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                           (req->rq_deadline > cfs_time_current_sec()));
  
                  if (rc == -ETIMEDOUT) {
-                        DEBUG_REQ(D_ERROR, req, "timeout on bulk GET");
+                        DEBUG_REQ(D_ERROR, req,
+                                  "timeout on bulk GET after %ld%+lds",
+                                  req->rq_deadline - start,
+                                  cfs_time_current_sec() -
+                                  req->rq_deadline);
                          ptlrpc_abort_bulk(desc);
                  } else if (desc->bd_export->exp_failed) {
                          DEBUG_REQ(D_ERROR, req, "Eviction on bulk GET");
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index 2d30542..6fdc64b 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -190,7 +190,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
  }
  
  /* Set server timelimit for this req */
-static void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req) 
+void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
  {
          __u32 serv_est;
          int idx;
@@ -532,7 +532,6 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
  
          request->rq_phase = RQ_PHASE_NEW;
  
-        /* XXX FIXME bug 249 */
          request->rq_request_portal = imp->imp_client->cli_request_portal;
          request->rq_reply_portal = imp->imp_client->cli_reply_portal;
          
diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c

index b7ce82b..dc31d08 100644 (file)
--- a/lustre/ptlrpc/pack_generic.c
+++ b/lustre/ptlrpc/pack_generic.c
@@ -2397,9 +2397,9 @@ void _debug_req(struct ptlrpc_request *req, __u32 mask,
          va_start(args, fmt);
          libcfs_debug_vmsg2(data->msg_cdls, data->msg_subsys, mask, data->msg_file,
                             data->msg_fn, data->msg_line, fmt, args,
-                           " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d "
-                           "e %d to %d dl %ld ref %d fl "REQ_FLAGS_FMT"/%x/%x "
-                           "rc %d/%d\n",
+                           " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d/%d "
+                           "lens %d/%d e %d to %d dl %ld ref %d "
+                           "fl "REQ_FLAGS_FMT"/%x/%x rc %d/%d\n",
                             req, req->rq_xid, req->rq_transno,
                             req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1,
                             req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) :
@@ -2409,8 +2409,7 @@ void _debug_req(struct ptlrpc_request *req, __u32 mask,
                                  (char *)req->rq_import->imp_connection->c_remote_uuid.uuid :
                             req->rq_export ?
                                  (char *)req->rq_export->exp_connection->c_remote_uuid.uuid : "<?>",
-                           (req->rq_import && req->rq_import->imp_client) ?
-                                req->rq_import->imp_client->cli_request_portal : -1,
+                           req->rq_request_portal,  req->rq_reply_portal,
                             req->rq_reqlen, req->rq_replen,
                             req->rq_early_count, req->rq_timeout, req->rq_deadline,
                             atomic_read(&req->rq_refcount), DEBUG_REQ_FLAGS(req),
diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c

index 4a46cb7..56e7228 100644 (file)
--- a/lustre/ptlrpc/ptlrpc_module.c
+++ b/lustre/ptlrpc/ptlrpc_module.c
@@ -145,6 +145,7 @@ EXPORT_SYMBOL(ptlrpc_add_rqs_to_pool);
  EXPORT_SYMBOL(ptlrpc_init_rq_pool);
  EXPORT_SYMBOL(ptlrpc_free_rq_pool);
  EXPORT_SYMBOL(ptlrpc_prep_req_pool);
+EXPORT_SYMBOL(ptlrpc_at_set_req_timeout);
  EXPORT_SYMBOL(ptlrpc_prep_req);
  EXPORT_SYMBOL(ptlrpc_free_req);
  EXPORT_SYMBOL(ptlrpc_unregister_reply);
diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c

index ea915c6..9ec7074 100644 (file)
--- a/lustre/ptlrpc/recov_thread.c
+++ b/lustre/ptlrpc/recov_thread.c
@@ -370,9 +370,10 @@ static int log_commit_thread(void *arg)
                                  break;
                          }
  
-                        /* XXX FIXME bug 249, 5515 */
+                        /* bug 5515 */
                          request->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL;
                          request->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
+                        ptlrpc_at_set_req_timeout(request);
  
                          ptlrpc_req_set_repsize(request, 1, NULL);
                          mutex_down(&llcd->llcd_ctxt->loc_sem);
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index e05bdf6..287076e 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -49,7 +49,7 @@ unsigned int at_history = 600;
  CFS_MODULE_PARM(at_history, "i", int, 0644,
                  "Adaptive timeouts remember the slowest event that took place "
                  "within this period (sec)");
-static int at_early_margin = 3;
+static int at_early_margin = 5;
  CFS_MODULE_PARM(at_early_margin, "i", int, 0644,
                  "How soon before an RPC deadline to send an early reply");
  static int at_extra = 30;
@@ -662,8 +662,9 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req,
                  RETURN(0);
          
          if (olddl < 0) {
-                CDEBUG(D_ADAPTTO, "x"LPU64": Already past deadline (%+lds), not"
-                       " sending early reply\n", req->rq_xid, olddl);
+                CDEBUG(D_WARNING, "x"LPU64": Already past deadline (%+lds), not"
+                       " sending early reply. Increase at_early_margin (%d)?\n",
+                       req->rq_xid, olddl, at_early_margin);
                  /* Return an error so we're not re-added to the timed list. */
                  RETURN(-ETIMEDOUT);
          }
@@ -996,11 +997,12 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
          /* Discard requests queued for longer than the deadline.  
             The deadline is increased if we send an early reply. */
          if (cfs_time_current_sec() > request->rq_deadline) {
-                CERROR("Dropping timed-out opc %d request from %s"
-                       ": deadline %lds ago\n",
-                       lustre_msg_get_opc(request->rq_reqmsg),
-                       libcfs_id2str(request->rq_peer),
-                       cfs_time_current_sec() - request->rq_deadline);
+                DEBUG_REQ(D_ERROR, request, "Dropping timed-out request from %s"
+                          ": deadline %ld%+lds ago\n",
+                          libcfs_id2str(request->rq_peer),
+                          request->rq_deadline -
+                          request->rq_arrival_time.tv_sec,
+                          cfs_time_current_sec() - request->rq_deadline);
                  goto put_rpc_export;
          }
  
@@ -1042,8 +1044,9 @@ put_conn:
  
          if (cfs_time_current_sec() > request->rq_deadline) {
                  DEBUG_REQ(D_WARNING, request, "Request x"LPU64" took longer "
-                          "than estimated (%+lds); client may timeout.",
-                          request->rq_xid,
+                          "than estimated (%ld%+lds); client may timeout.",
+                          request->rq_xid, request->rq_deadline -
+                          request->rq_arrival_time.tv_sec,
                            cfs_time_current_sec() - request->rq_deadline);
          }
author	nathan <nathan>
	Mon, 5 Nov 2007 19:30:38 +0000 (19:30 +0000)
committer	nathan <nathan>
	Mon, 5 Nov 2007 19:30:38 +0000 (19:30 +0000)
lustre/include/lustre_net.h		patch \| blob \| history
lustre/ldlm/ldlm_request.c		patch \| blob \| history
lustre/mdc/mdc_reint.c		patch \| blob \| history
lustre/mdc/mdc_request.c		patch \| blob \| history
lustre/osc/osc_create.c		patch \| blob \| history
lustre/osc/osc_request.c		patch \| blob \| history
lustre/ost/ost_handler.c		patch \| blob \| history
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/pack_generic.c		patch \| blob \| history
lustre/ptlrpc/ptlrpc_module.c		patch \| blob \| history
lustre/ptlrpc/recov_thread.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history