Whamcloud - gitweb
LU-904 ptlrpc: redo io on -EINPROGRESS
authorNiu Yawei <niu@whamcloud.com>
Fri, 13 Jan 2012 08:33:22 +0000 (00:33 -0800)
committerOleg Drokin <green@whamcloud.com>
Mon, 7 May 2012 19:34:34 +0000 (15:34 -0400)
When server return -EINPROGRESS for a write RPC, the client
should keep resending the RPC until server return other
error code or the client is evicted.

This is required by the new quota design: when a write on
OST can't acquire quota from master for broken network, it
should return -EINPROGRESS to inform the client to retry
write infinitely.

This patch also fixed the defect of redo io RPC can't be
aborted during eviction, in a lightweight manner.

Signed-off-by: Niu Yawei <niu@whamcloud.com>
Change-Id: Iea393cb1ea55e9d006f52dbfc39a2b9a3670d682
Reviewed-on: http://review.whamcloud.com/1962
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Johann Lombardi <johann@whamcloud.com>
Reviewed-by: Fan Yong <yong.fan@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre/lustre_idl.h
lustre/include/lustre_net.h
lustre/include/obd_support.h
lustre/liblustre/super.c
lustre/llite/llite_lib.c
lustre/obdfilter/filter_io_26.c
lustre/osc/osc_internal.h
lustre/osc/osc_request.c
lustre/ptlrpc/client.c
lustre/tests/replay-ost-single.sh

index 42426f3..e0e22db 100644 (file)
@@ -1172,7 +1172,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
                                 OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN | \
                                 OBD_CONNECT_GRANT_SHRINK | OBD_CONNECT_FULL20 | \
                                 OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | \
-                                OBD_CONNECT_MAX_EASIZE)
+                                OBD_CONNECT_MAX_EASIZE | \
+                                OBD_CONNECT_EINPROGRESS)
 #define ECHO_CONNECT_SUPPORTED (0)
 #define MGS_CONNECT_SUPPORTED  (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
                                 OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV)
index 682a977..5c60f12 100644 (file)
@@ -516,7 +516,8 @@ struct ptlrpc_request {
                 rq_reply_truncate:1,
                 rq_committed:1,
                 /* whether the "rq_set" is a valid one */
-                rq_invalid_rqset:1;
+                rq_invalid_rqset:1,
+                rq_generation_set:1;
 
         enum rq_phase rq_phase; /* one of RQ_PHASE_* */
         enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */
index 4c96565..33cc91e 100644 (file)
@@ -288,6 +288,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_OST_BRW_PAUSE_BULK2     0x227
 #define OBD_FAIL_OST_MAPBLK_ENOSPC       0x228
 #define OBD_FAIL_OST_ENOINO              0x229
+#define OBD_FAIL_OST_DQACQ_NET           0x230
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
index 9f474bc..312ce52 100644 (file)
@@ -1967,7 +1967,7 @@ llu_fsswop_mount(const char *source,
         ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL |
                                 OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK |
                                 OBD_CONNECT_FID | OBD_CONNECT_AT |
-                                OBD_CONNECT_FULL20;
+                                OBD_CONNECT_FULL20 | OBD_CONNECT_EINPROGRESS;
 
         ocd.ocd_version = LUSTRE_VERSION_CODE;
         err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL);
index 8bc0c5e..efaa76f 100644 (file)
@@ -385,7 +385,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                                   OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT |
                                   OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR|
                                   OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH |
-                                  OBD_CONNECT_MAXBYTES;
+                                  OBD_CONNECT_MAXBYTES |
+                                  OBD_CONNECT_EINPROGRESS;
 
         if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
                 data->ocd_connect_flags |= OBD_CONNECT_SOM;
index 896fa0d..cafb080 100644 (file)
@@ -693,6 +693,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         if (rc == -ENOTCONN)
                 GOTO(cleanup, rc);
 
+        if (OBD_FAIL_CHECK(OBD_FAIL_OST_DQACQ_NET))
+                GOTO(cleanup, rc = -EINPROGRESS);
+
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         cleanup_phase = 2;
 
index 28b5ecb..177ebf1 100644 (file)
@@ -191,7 +191,8 @@ extern struct lu_device_type osc_device_type;
 
 static inline int osc_recoverable_error(int rc)
 {
-        return (rc == -EIO || rc == -EROFS || rc == -ENOMEM || rc == -EAGAIN);
+        return (rc == -EIO || rc == -EROFS || rc == -ENOMEM ||
+                rc == -EAGAIN || rc == -EINPROGRESS);
 }
 
 #ifndef min_t
index 6d87a0a..995391d 100644 (file)
@@ -1696,12 +1696,13 @@ static int osc_brw_internal(int cmd, struct obd_export *exp, struct obdo *oa,
         struct ptlrpc_request *req;
         int                    rc;
         cfs_waitq_t            waitq;
-        int                    resends = 0;
+        int                    generation, resends = 0;
         struct l_wait_info     lwi;
 
         ENTRY;
 
         cfs_waitq_init(&waitq);
+        generation = exp->exp_obd->u.cli.cl_import->imp_generation;
 
 restart_bulk:
         rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm,
@@ -1709,6 +1710,11 @@ restart_bulk:
         if (rc != 0)
                 return (rc);
 
+        if (resends) {
+                req->rq_generation_set = 1;
+                req->rq_import_generation = generation;
+        }
+
         rc = ptlrpc_queue_wait(req);
 
         if (rc == -ETIMEDOUT && req->rq_resend) {
@@ -1720,19 +1726,34 @@ restart_bulk:
         rc = osc_brw_fini_request(req, rc);
 
         ptlrpc_req_finished(req);
+        /* When server return -EINPROGRESS, client should always retry
+         * regardless of the number of times the bulk was resent already.*/
         if (osc_recoverable_error(rc)) {
                 resends++;
-                if (!client_should_resend(resends, &exp->exp_obd->u.cli)) {
-                        CERROR("too many resend retries, returning error\n");
-                        RETURN(-EIO);
+                if (rc != -EINPROGRESS &&
+                    !client_should_resend(resends, &exp->exp_obd->u.cli)) {
+                        CERROR("%s: too many resend retries for object: "
+                               ""LPU64":"LPU64", rc = %d.\n",
+                               exp->exp_obd->obd_name, oa->o_id, oa->o_seq, rc);
+                        goto out;
+                }
+                if (generation !=
+                    exp->exp_obd->u.cli.cl_import->imp_generation) {
+                        CDEBUG(D_HA, "%s: resend cross eviction for object: "
+                               ""LPU64":"LPU64", rc = %d.\n",
+                               exp->exp_obd->obd_name, oa->o_id, oa->o_seq, rc);
+                        goto out;
                 }
 
-                lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, NULL);
+                lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL,
+                                       NULL);
                 l_wait_event(waitq, 0, &lwi);
 
                 goto restart_bulk;
         }
-
+out:
+        if (rc == -EAGAIN || rc == -EINPROGRESS)
+                rc = -EIO;
         RETURN (rc);
 }
 
@@ -1746,11 +1767,6 @@ int osc_brw_redo_request(struct ptlrpc_request *request,
         int rc = 0;
         ENTRY;
 
-        if (!client_should_resend(aa->aa_resends, aa->aa_cli)) {
-                CERROR("too many resent retries, returning error\n");
-                RETURN(-EIO);
-        }
-
         DEBUG_REQ(D_ERROR, request, "redo for recoverable error");
 
         rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
@@ -1782,6 +1798,8 @@ int osc_brw_redo_request(struct ptlrpc_request *request,
         new_req->rq_interpret_reply = request->rq_interpret_reply;
         new_req->rq_async_args = request->rq_async_args;
         new_req->rq_sent = cfs_time_current_sec() + aa->aa_resends;
+        new_req->rq_generation_set = 1;
+        new_req->rq_import_generation = request->rq_import_generation;
 
         new_aa = ptlrpc_req_async_args(new_req);
 
@@ -2232,10 +2250,29 @@ static int brw_interpret(const struct lu_env *env,
 
         rc = osc_brw_fini_request(req, rc);
         CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc);
+        /* When server return -EINPROGRESS, client should always retry
+         * regardless of the number of times the bulk was resent already. */
         if (osc_recoverable_error(rc)) {
-                rc = osc_brw_redo_request(req, aa);
+                if (req->rq_import_generation !=
+                    req->rq_import->imp_generation) {
+                        CDEBUG(D_HA, "%s: resend cross eviction for object: "
+                               ""LPU64":"LPU64", rc = %d.\n",
+                               req->rq_import->imp_obd->obd_name,
+                               aa->aa_oa->o_id, aa->aa_oa->o_seq, rc);
+                } else if (rc == -EINPROGRESS ||
+                    client_should_resend(aa->aa_resends, aa->aa_cli)) {
+                        rc = osc_brw_redo_request(req, aa);
+                } else {
+                        CERROR("%s: too many resent retries for object: "
+                               ""LPU64":"LPU64", rc = %d.\n",
+                               req->rq_import->imp_obd->obd_name,
+                               aa->aa_oa->o_id, aa->aa_oa->o_seq, rc);
+                }
+
                 if (rc == 0)
                         RETURN(0);
+                else if (rc == -EAGAIN || rc == -EINPROGRESS)
+                        rc = -EIO;
         }
 
         if (aa->aa_ocapa) {
index 2e1d161..6b50207 100644 (file)
@@ -1317,23 +1317,25 @@ static int after_reply(struct ptlrpc_request *req)
  * Helper function to send request \a req over the network for the first time
  * Also adjusts request phase.
  * Returns 0 on success or error code.
- */ 
+ */
 static int ptlrpc_send_new_req(struct ptlrpc_request *req)
 {
-        struct obd_import     *imp;
+        struct obd_import     *imp = req->rq_import;
         int rc;
         ENTRY;
 
         LASSERT(req->rq_phase == RQ_PHASE_NEW);
-        if (req->rq_sent && (req->rq_sent > cfs_time_current_sec()))
+        if (req->rq_sent && (req->rq_sent > cfs_time_current_sec()) &&
+            (!req->rq_generation_set ||
+             req->rq_import_generation == imp->imp_generation))
                 RETURN (0);
 
         ptlrpc_rqphase_move(req, RQ_PHASE_RPC);
 
-        imp = req->rq_import;
         cfs_spin_lock(&imp->imp_lock);
 
-        req->rq_import_generation = imp->imp_generation;
+        if (!req->rq_generation_set)
+                req->rq_import_generation = imp->imp_generation;
 
         if (ptlrpc_import_delay_req(imp, req, &rc)) {
                 cfs_spin_lock(&req->rq_lock);
index 7727f38..036750b 100755 (executable)
@@ -245,6 +245,66 @@ test_7() {
 }
 run_test 7 "Fail OST before obd_destroy"
 
+test_8a() {
+    verify=$ROOT/tmp/verify-$$
+    dd if=/dev/urandom of=$verify bs=4096 count=1280 ||
+        error "Create verify file failed"
+#define OBD_FAIL_OST_DQACQ_NET           0x230
+    do_facet ost1 "lctl set_param fail_loc=0x230"
+    dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync &
+    ddpid=$!
+    sleep $TIMEOUT  # wait for the io to become redo io
+    if ! ps -p $ddpid  > /dev/null 2>&1; then
+            error "redo io finished incorrectly"
+            return 1
+    fi
+    do_facet ost1 "lctl set_param fail_loc=0"
+    wait $ddpid || return 1
+    cancel_lru_locks osc
+    cmp $verify $TDIR/$tfile || return 2
+    rm -f $verify $TDIR/$tfile
+}
+run_test 8a "Verify redo io: redo io when get -EINPROGRESS error"
+
+test_8b() {
+    verify=$ROOT/tmp/verify-$$
+    dd if=/dev/urandom of=$verify bs=4096 count=1280 ||
+        error "Create verify file failed"
+#define OBD_FAIL_OST_DQACQ_NET           0x230
+    do_facet ost1 "lctl set_param fail_loc=0x230"
+    dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync &
+    ddpid=$!
+    sleep $TIMEOUT  # wait for the io to become redo io
+    fail ost1
+    do_facet ost1 "lctl set_param fail_loc=0"
+    wait $ddpid || return 1
+    cancel_lru_locks osc
+    cmp $verify $TDIR/$tfile || return 2
+    rm -f $verify $TDIR/$tfile
+}
+run_test 8b "Verify redo io: redo io should success after recovery"
+
+test_8c() {
+    verify=$ROOT/tmp/verify-$$
+    dd if=/dev/urandom of=$verify bs=4096 count=1280 ||
+        error "Create verify file failed"
+#define OBD_FAIL_OST_DQACQ_NET           0x230
+    do_facet ost1 "lctl set_param fail_loc=0x230"
+    dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync &
+    ddpid=$!
+    sleep $TIMEOUT  # wait for the io to become redo io
+    ost_evict_client
+    # allow recovery to complete
+    sleep $((TIMEOUT + 2))
+    do_facet ost1 "lctl set_param fail_loc=0"
+    wait $ddpid
+    cancel_lru_locks osc
+    cmp $verify $TDIR/$tfile && return 2
+    rm -f $verify $TDIR/$tfile
+}
+run_test 8c "Verify redo io: redo io should fail after eviction"
+
+
 complete $(basename $0) $SECONDS
 check_and_cleanup_lustre
 exit_status