OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN | \
OBD_CONNECT_GRANT_SHRINK | OBD_CONNECT_FULL20 | \
OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | \
- OBD_CONNECT_MAX_EASIZE)
+ OBD_CONNECT_MAX_EASIZE | \
+ OBD_CONNECT_EINPROGRESS)
#define ECHO_CONNECT_SUPPORTED (0)
#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV)
rq_reply_truncate:1,
rq_committed:1,
/* whether the "rq_set" is a valid one */
- rq_invalid_rqset:1;
+ rq_invalid_rqset:1,
+ rq_generation_set:1;
enum rq_phase rq_phase; /* one of RQ_PHASE_* */
enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */
#define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227
#define OBD_FAIL_OST_MAPBLK_ENOSPC 0x228
#define OBD_FAIL_OST_ENOINO 0x229
+#define OBD_FAIL_OST_DQACQ_NET 0x230
#define OBD_FAIL_LDLM 0x300
#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301
ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL |
OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK |
OBD_CONNECT_FID | OBD_CONNECT_AT |
- OBD_CONNECT_FULL20;
+ OBD_CONNECT_FULL20 | OBD_CONNECT_EINPROGRESS;
ocd.ocd_version = LUSTRE_VERSION_CODE;
err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL);
OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT |
OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR|
OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH |
- OBD_CONNECT_MAXBYTES;
+ OBD_CONNECT_MAXBYTES |
+ OBD_CONNECT_EINPROGRESS;
if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
data->ocd_connect_flags |= OBD_CONNECT_SOM;
if (rc == -ENOTCONN)
GOTO(cleanup, rc);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OST_DQACQ_NET))
+ GOTO(cleanup, rc = -EINPROGRESS);
+
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
cleanup_phase = 2;
static inline int osc_recoverable_error(int rc)
{
- return (rc == -EIO || rc == -EROFS || rc == -ENOMEM || rc == -EAGAIN);
+ return (rc == -EIO || rc == -EROFS || rc == -ENOMEM ||
+ rc == -EAGAIN || rc == -EINPROGRESS);
}
#ifndef min_t
struct ptlrpc_request *req;
int rc;
cfs_waitq_t waitq;
- int resends = 0;
+ int generation, resends = 0;
struct l_wait_info lwi;
ENTRY;
cfs_waitq_init(&waitq);
+ generation = exp->exp_obd->u.cli.cl_import->imp_generation;
restart_bulk:
rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm,
if (rc != 0)
return (rc);
+ if (resends) {
+ req->rq_generation_set = 1;
+ req->rq_import_generation = generation;
+ }
+
rc = ptlrpc_queue_wait(req);
if (rc == -ETIMEDOUT && req->rq_resend) {
rc = osc_brw_fini_request(req, rc);
ptlrpc_req_finished(req);
+ /* When server return -EINPROGRESS, client should always retry
+ * regardless of the number of times the bulk was resent already.*/
if (osc_recoverable_error(rc)) {
resends++;
- if (!client_should_resend(resends, &exp->exp_obd->u.cli)) {
- CERROR("too many resend retries, returning error\n");
- RETURN(-EIO);
+ if (rc != -EINPROGRESS &&
+ !client_should_resend(resends, &exp->exp_obd->u.cli)) {
+ CERROR("%s: too many resend retries for object: "
+ ""LPU64":"LPU64", rc = %d.\n",
+ exp->exp_obd->obd_name, oa->o_id, oa->o_seq, rc);
+ goto out;
+ }
+ if (generation !=
+ exp->exp_obd->u.cli.cl_import->imp_generation) {
+ CDEBUG(D_HA, "%s: resend cross eviction for object: "
+ ""LPU64":"LPU64", rc = %d.\n",
+ exp->exp_obd->obd_name, oa->o_id, oa->o_seq, rc);
+ goto out;
}
- lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, NULL);
+ lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL,
+ NULL);
l_wait_event(waitq, 0, &lwi);
goto restart_bulk;
}
-
+out:
+ if (rc == -EAGAIN || rc == -EINPROGRESS)
+ rc = -EIO;
RETURN (rc);
}
int rc = 0;
ENTRY;
- if (!client_should_resend(aa->aa_resends, aa->aa_cli)) {
- CERROR("too many resent retries, returning error\n");
- RETURN(-EIO);
- }
-
DEBUG_REQ(D_ERROR, request, "redo for recoverable error");
rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
new_req->rq_interpret_reply = request->rq_interpret_reply;
new_req->rq_async_args = request->rq_async_args;
new_req->rq_sent = cfs_time_current_sec() + aa->aa_resends;
+ new_req->rq_generation_set = 1;
+ new_req->rq_import_generation = request->rq_import_generation;
new_aa = ptlrpc_req_async_args(new_req);
rc = osc_brw_fini_request(req, rc);
CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc);
+ /* When server return -EINPROGRESS, client should always retry
+ * regardless of the number of times the bulk was resent already. */
if (osc_recoverable_error(rc)) {
- rc = osc_brw_redo_request(req, aa);
+ if (req->rq_import_generation !=
+ req->rq_import->imp_generation) {
+ CDEBUG(D_HA, "%s: resend cross eviction for object: "
+ ""LPU64":"LPU64", rc = %d.\n",
+ req->rq_import->imp_obd->obd_name,
+ aa->aa_oa->o_id, aa->aa_oa->o_seq, rc);
+ } else if (rc == -EINPROGRESS ||
+ client_should_resend(aa->aa_resends, aa->aa_cli)) {
+ rc = osc_brw_redo_request(req, aa);
+ } else {
+ CERROR("%s: too many resent retries for object: "
+ ""LPU64":"LPU64", rc = %d.\n",
+ req->rq_import->imp_obd->obd_name,
+ aa->aa_oa->o_id, aa->aa_oa->o_seq, rc);
+ }
+
if (rc == 0)
RETURN(0);
+ else if (rc == -EAGAIN || rc == -EINPROGRESS)
+ rc = -EIO;
}
if (aa->aa_ocapa) {
* Helper function to send request \a req over the network for the first time
* Also adjusts request phase.
* Returns 0 on success or error code.
- */
+ */
static int ptlrpc_send_new_req(struct ptlrpc_request *req)
{
- struct obd_import *imp;
+ struct obd_import *imp = req->rq_import;
int rc;
ENTRY;
LASSERT(req->rq_phase == RQ_PHASE_NEW);
- if (req->rq_sent && (req->rq_sent > cfs_time_current_sec()))
+ if (req->rq_sent && (req->rq_sent > cfs_time_current_sec()) &&
+ (!req->rq_generation_set ||
+ req->rq_import_generation == imp->imp_generation))
RETURN (0);
ptlrpc_rqphase_move(req, RQ_PHASE_RPC);
- imp = req->rq_import;
cfs_spin_lock(&imp->imp_lock);
- req->rq_import_generation = imp->imp_generation;
+ if (!req->rq_generation_set)
+ req->rq_import_generation = imp->imp_generation;
if (ptlrpc_import_delay_req(imp, req, &rc)) {
cfs_spin_lock(&req->rq_lock);
}
run_test 7 "Fail OST before obd_destroy"
+test_8a() {
+ verify=$ROOT/tmp/verify-$$
+ dd if=/dev/urandom of=$verify bs=4096 count=1280 ||
+ error "Create verify file failed"
+#define OBD_FAIL_OST_DQACQ_NET 0x230
+ do_facet ost1 "lctl set_param fail_loc=0x230"
+ dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync &
+ ddpid=$!
+ sleep $TIMEOUT # wait for the io to become redo io
+ if ! ps -p $ddpid > /dev/null 2>&1; then
+ error "redo io finished incorrectly"
+ return 1
+ fi
+ do_facet ost1 "lctl set_param fail_loc=0"
+ wait $ddpid || return 1
+ cancel_lru_locks osc
+ cmp $verify $TDIR/$tfile || return 2
+ rm -f $verify $TDIR/$tfile
+}
+run_test 8a "Verify redo io: redo io when get -EINPROGRESS error"
+
+test_8b() {
+ verify=$ROOT/tmp/verify-$$
+ dd if=/dev/urandom of=$verify bs=4096 count=1280 ||
+ error "Create verify file failed"
+#define OBD_FAIL_OST_DQACQ_NET 0x230
+ do_facet ost1 "lctl set_param fail_loc=0x230"
+ dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync &
+ ddpid=$!
+ sleep $TIMEOUT # wait for the io to become redo io
+ fail ost1
+ do_facet ost1 "lctl set_param fail_loc=0"
+ wait $ddpid || return 1
+ cancel_lru_locks osc
+ cmp $verify $TDIR/$tfile || return 2
+ rm -f $verify $TDIR/$tfile
+}
+run_test 8b "Verify redo io: redo io should success after recovery"
+
+test_8c() {
+ verify=$ROOT/tmp/verify-$$
+ dd if=/dev/urandom of=$verify bs=4096 count=1280 ||
+ error "Create verify file failed"
+#define OBD_FAIL_OST_DQACQ_NET 0x230
+ do_facet ost1 "lctl set_param fail_loc=0x230"
+ dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync &
+ ddpid=$!
+ sleep $TIMEOUT # wait for the io to become redo io
+ ost_evict_client
+ # allow recovery to complete
+ sleep $((TIMEOUT + 2))
+ do_facet ost1 "lctl set_param fail_loc=0"
+ wait $ddpid
+ cancel_lru_locks osc
+ cmp $verify $TDIR/$tfile && return 2
+ rm -f $verify $TDIR/$tfile
+}
+run_test 8c "Verify redo io: redo io should fail after eviction"
+
+
complete $(basename $0) $SECONDS
check_and_cleanup_lustre
exit_status