Whamcloud - gitweb
don't resend llog cancels,
authorshadow <shadow>
Wed, 3 Dec 2008 04:54:58 +0000 (04:54 +0000)
committershadow <shadow>
Wed, 3 Dec 2008 04:54:58 +0000 (04:54 +0000)
fix resend requests for ldlm imports.

Branch HEAD
b=17695
i=umka
i=tappro

lustre/ptlrpc/client.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/recov_thread.c
lustre/tests/replay-single.sh

index 0913e85..f06de2c 100644 (file)
@@ -1249,8 +1249,11 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                                         continue;
 
                                 spin_lock(&imp->imp_lock);
-
                                 if (ptlrpc_import_delay_req(imp, req, &status)){
+                                        /* put on delay list - only if we wait
+                                         * recovery finished - before send */
+                                        list_del_init(&req->rq_list);
+                                        list_add_tail(&req->rq_list, &imp->imp_delayed_list);
                                         spin_unlock(&imp->imp_lock);
                                         continue;
                                 }
@@ -1282,8 +1285,6 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                                         /* This is re-sending anyways, 
                                          * let's mark req as resend. */
                                         req->rq_resend = 1;
-                                        lustre_msg_add_flags(req->rq_reqmsg,
-                                                             MSG_RESENT);
                                         if (req->rq_bulk) {
                                                 __u64 old_xid;
 
@@ -1352,17 +1353,8 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                         spin_unlock(&req->rq_lock);
 
                         req->rq_status = after_reply(req);
-                        if (req->rq_resend) {
-                                /* Add this req to the delayed list so
-                                   it can be errored if the import is
-                                   evicted after recovery. */
-                                spin_lock(&imp->imp_lock);
-                                list_del_init(&req->rq_list);
-                                list_add_tail(&req->rq_list,
-                                              &imp->imp_delayed_list);
-                                spin_unlock(&imp->imp_lock);
+                        if (req->rq_resend)
                                 continue;
-                        }
 
                         /* If there is no bulk associated with this request,
                          * then we're done and should let the interpreter
@@ -2154,8 +2146,6 @@ restart:
         }
 
         if (req->rq_resend) {
-                lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
-
                 if (req->rq_bulk != NULL) {
                         ptlrpc_unregister_bulk(req, 0);
 
@@ -2246,18 +2236,18 @@ after_send:
         }
 
         /* Resend if we need to */
-        if (req->rq_resend) {
+        if (req->rq_resend||req->rq_timedout) {
                 /* ...unless we were specifically told otherwise. */
                 if (req->rq_no_resend)
                         GOTO(out, rc = -ETIMEDOUT);
                 spin_lock(&imp->imp_lock);
+                /* we can have rq_timeout on dlm fake import which not support
+                 * recovery - but me need resend request on this import instead
+                 * of return error */
+                req->rq_resend = 1;
                 goto restart;
         }
 
-        if (req->rq_timedout) {                 /* non-recoverable timeout */
-                GOTO(out, rc = -ETIMEDOUT);
-        }
-
         if (!ptlrpc_client_replied(req)) {
                 /* How can this be? -eeb */
                 DEBUG_REQ(D_ERROR, req, "!rq_replied: ");
index 786f929..11f6641 100644 (file)
@@ -540,6 +540,9 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                         RETURN(rc);
         }
 
+        if (request->rq_resend)
+                lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
+
         if (!noreply) {
                 LASSERT (request->rq_replen != 0);
                 if (request->rq_repbuf == NULL) {
index 69aec83..dc4b13b 100644 (file)
@@ -270,6 +270,11 @@ static int llcd_send(struct llog_canceld_ctxt *llcd)
         req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
         req->rq_interpret_reply = (ptlrpc_interpterer_t)llcd_interpret;
         req->rq_async_args.pointer_arg[0] = llcd;
+
+        /* llog cancels will be replayed after reconnect so this will do twice
+         * first from replay llog, second for resended rpc */
+        req->rq_no_delay = req->rq_no_resend = 1;
+
         rc = ptlrpc_set_add_new_req(&lcm->lcm_pc, req);
         if (rc) {
                 ptlrpc_request_free(req);
index e97d286..1135b45 100755 (executable)
@@ -1427,33 +1427,6 @@ test_59() {
 }
 run_test 59 "test log_commit_thread vs filter_destroy race"
 
-# bug 17323
-test_59b() {
-    do_facet $SINGLEMDS "lctl set_param debug=+rpctrace"
-    mkdir -p $DIR/$tdir
-    createmany -o $DIR/$tdir/$tfile-%d 2000
-    sync
-#define OBD_FAIL_OBD_LOG_CANCEL_REP      0x606
-    do_facet $SINGLEMDS "lctl set_param fail_loc=0x606"
-    unlinkmany $DIR/$tdir/$tfile-%d 2000
-
-    # make sure that all llcds left ost and nothing left cached
-    sync
-    sleep 10
-    do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
-
-    # sleep 2 obd_timeouts from ost to make sure that we get resents.
-    local timeout=$(do_facet ost1 lctl get_param -n timeout)
-    timeout=$((timeout * 2))
-    log "Sleep $timeout"
-    sleep $timeout
-    do_facet $SINGLEMDS $LCTL dk | grep -q "RESENT cancel req"
-    local res=$?
-    rmdir $DIR/$tdir
-    return $res
-}
-run_test 59b "resent handle in llog_origin_handle_cancel"
-
 # race between add unlink llog vs cat log init in post_recovery (only for b1_6)
 # bug 12086: should no oops and No ctxt error for this test
 test_60() {