Whamcloud - gitweb
LU-1788 osc: don't print error msg for EINPROGRESS resend
authorJohann Lombardi <johann@whamcloud.com>
Mon, 27 Aug 2012 16:02:38 +0000 (18:02 +0200)
committerOleg Drokin <green@whamcloud.com>
Tue, 4 Sep 2012 15:19:07 +0000 (11:19 -0400)
Now that -EINPROGRESS can be legitimately returned during normal
operation (e.g. quota rebalancing in progress), we shouldn't print an
error message on the client each time the BRW is resent because of
-EINPROGRESS.

This patch also caps the resend delay for BRW to the current request
timeout.

Signed-off-by: Johann Lombardi <johann@whamcloud.com>
Change-Id: Ie7447602756b0721351c7c90cbfb40ad8e3bb720
Reviewed-on: http://review.whamcloud.com/3792
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Niu Yawei <niu@whamcloud.com>
lustre/include/lustre/lustre_idl.h
lustre/osc/osc_request.c
lustre/ptlrpc/niobuf.c
lustre/tests/replay-ost-single.sh

index 8809569..f184b71 100644 (file)
@@ -1152,7 +1152,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define OBD_CONNECT_JOBSTATS    0x20000000000ULL /* jobid in ptlrpc_body */
 #define OBD_CONNECT_UMASK       0x40000000000ULL /* create uses client umask */
 #define OBD_CONNECT_EINPROGRESS 0x80000000000ULL /* client handles -EINPROGRESS
-                                                  * write RPC error properly */
+                                                  * RPC error properly */
 #define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for
                                                   * finer space reservation */
 #define OBD_CONNECT_NANOSEC_TIME 0x200000000000ULL /* nanosecond timestamps */
index d693fb6..aa79f52 100644 (file)
@@ -1677,16 +1677,16 @@ out:
         RETURN (rc);
 }
 
-int osc_brw_redo_request(struct ptlrpc_request *request,
-                         struct osc_brw_async_args *aa)
+static int osc_brw_redo_request(struct ptlrpc_request *request,
+                               struct osc_brw_async_args *aa, int rc)
 {
         struct ptlrpc_request *new_req;
         struct osc_brw_async_args *new_aa;
         struct osc_async_page *oap;
-        int rc = 0;
         ENTRY;
 
-        DEBUG_REQ(D_ERROR, request, "redo for recoverable error");
+       DEBUG_REQ(rc == -EINPROGRESS ? D_RPCTRACE : D_ERROR, request,
+                 "redo for recoverable error %d", rc);
 
         rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
                                         OST_WRITE ? OBD_BRW_WRITE :OBD_BRW_READ,
@@ -1713,7 +1713,12 @@ int osc_brw_redo_request(struct ptlrpc_request *request,
         aa->aa_resends++;
         new_req->rq_interpret_reply = request->rq_interpret_reply;
         new_req->rq_async_args = request->rq_async_args;
-        new_req->rq_sent = cfs_time_current_sec() + aa->aa_resends;
+       /* cap resend delay to the current request timeout, this is similar to
+        * what ptlrpc does (see after_reply()) */
+       if (aa->aa_resends > new_req->rq_timeout)
+               new_req->rq_sent = cfs_time_current_sec() + new_req->rq_timeout;
+       else
+               new_req->rq_sent = cfs_time_current_sec() + aa->aa_resends;
         new_req->rq_generation_set = 1;
         new_req->rq_import_generation = request->rq_import_generation;
 
@@ -1918,7 +1923,7 @@ static int brw_interpret(const struct lu_env *env,
                                aa->aa_oa->o_id, aa->aa_oa->o_seq, rc);
                 } else if (rc == -EINPROGRESS ||
                     client_should_resend(aa->aa_resends, aa->aa_cli)) {
-                        rc = osc_brw_redo_request(req, aa);
+                        rc = osc_brw_redo_request(req, aa, rc);
                 } else {
                         CERROR("%s: too many resent retries for object: "
                                ""LPU64":"LPU64", rc = %d.\n",
index 671cee3..cefa639 100644 (file)
@@ -564,7 +564,8 @@ int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult)
         }
 
         if (req->rq_status != -ENOSPC && req->rq_status != -EACCES &&
-            req->rq_status != -EPERM && req->rq_status != -ENOENT)
+           req->rq_status != -EPERM && req->rq_status != -ENOENT &&
+           req->rq_status != -EINPROGRESS)
                 req->rq_type = PTL_RPC_MSG_ERR;
 
         rc = ptlrpc_send_reply(req, may_be_difficult);
index fa62200..a58c4b3 100755 (executable)
@@ -261,10 +261,12 @@ test_8a() {
             return 1
     fi
     do_facet ost1 "lctl set_param fail_loc=0"
-    wait $ddpid || return 1
+    wait $ddpid || true
     cancel_lru_locks osc
     cmp $verify $TDIR/$tfile || return 2
     rm -f $verify $TDIR/$tfile
+       message=`dmesg | grep "redo for recoverable error -115"`
+       [ -z "$message" ] || error "redo error messages found in dmesg"
 }
 run_test 8a "Verify redo io: redo io when get -EINPROGRESS error"