From fb4073bc3cfbf1a7ad17b03270e986098a2869ee Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Thu, 27 May 2010 23:55:29 +0400 Subject: [PATCH] b=20997 Enable time limit on the request to be queued i=rread,nathan This feature is enabled by setting the time limit on the request. The limit is on how long the request is queued, whether or not it's ever been sent. IOW, it's not a "resend" limit, because it can expire even if the request has never been sent the first time. Patch removes also unused imp flags and modify conf-sanity test to use 'mds mount first' scheme to avoid long test time --- lustre/include/lustre_import.h | 4 -- lustre/include/lustre_net.h | 23 ++++++++ lustre/ldlm/ldlm_lib.c | 2 - lustre/lmv/lmv_obd.c | 3 +- lustre/mdc/mdc_request.c | 23 -------- lustre/mgc/mgc_request.c | 116 ++++++++++++--------------------------- lustre/obdclass/lprocfs_status.c | 2 - lustre/osc/osc_request.c | 12 ---- lustre/ptlrpc/client.c | 23 ++++---- lustre/ptlrpc/import.c | 48 +--------------- lustre/ptlrpc/recover.c | 2 +- lustre/tests/conf-sanity.sh | 18 +++--- 12 files changed, 83 insertions(+), 193 deletions(-) diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index 46b039d..badfe44 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -177,16 +177,12 @@ struct obd_import { imp_replayable:1, /* try to recover the import */ imp_dlm_fake:1, /* don't run recovery (timeout instead) */ imp_server_timeout:1, /* use 1/2 timeout on MDS' OSCs */ - imp_initial_recov:1, /* retry the initial connection */ - imp_initial_recov_bk:1, /* turn off init_recov after trying all failover nids */ imp_delayed_recovery:1, /* VBR: imp in delayed recovery */ imp_no_lock_replay:1, /* VBR: if gap was found then no lock replays */ imp_vbr_failed:1, /* recovery by versions was failed */ imp_force_verify:1, /* force an immidiate ping */ imp_pingable:1, /* pingable */ imp_resend_replay:1, /* resend for replay */ - imp_recon_bk:1, /* turn off reconnect if all failovers fail */ - imp_last_recon:1, /* internally used by above */ imp_force_reconnect:1; /* import must be reconnected instead of chouse new connection */ __u32 imp_connect_op; struct obd_connect_data imp_connect_data; diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 946490a..5e8d0d5 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -459,6 +459,8 @@ struct ptlrpc_request { /* client+server request */ lnet_handle_md_t rq_req_md_h; struct ptlrpc_cb_id rq_req_cbid; + cfs_duration_t rq_delay_limit; /* optional time limit for send attempts */ + cfs_time_t rq_queued_time; /* time request was first queued */ /* server-side... */ struct timeval rq_arrival_time; /* request arrival time */ @@ -1324,6 +1326,27 @@ static inline int ptlrpc_req_get_repsize(struct ptlrpc_request *req) } } +static inline int ptlrpc_send_limit_expired(struct ptlrpc_request *req) +{ + if (req->rq_delay_limit != 0 && + cfs_time_before(cfs_time_add(req->rq_queued_time, + cfs_time_seconds(req->rq_delay_limit)), + cfs_time_current())) { + return 1; + } + return 0; +} + +static inline int ptlrpc_no_resend(struct ptlrpc_request *req) +{ + if (!req->rq_no_resend && ptlrpc_send_limit_expired(req)) { + cfs_spin_lock(&req->rq_lock); + req->rq_no_resend = 1; + cfs_spin_unlock(&req->rq_lock); + } + return req->rq_no_resend; +} + /* ldlm/ldlm_lib.c */ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg); int client_obd_cleanup(struct obd_device *obddev); diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 330f3a7..943f0b1 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -337,8 +337,6 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) GOTO(err_ldlm, rc = -ENOENT); imp->imp_client = &obddev->obd_ldlm_client; imp->imp_connect_op = connect_op; - imp->imp_initial_recov = 1; - imp->imp_initial_recov_bk = 0; CFS_INIT_LIST_HEAD(&imp->imp_pinger_chain); memcpy(cli->cl_target_uuid.uuid, lustre_cfg_buf(lcfg, 1), LUSTRE_CFG_BUFLEN(lcfg, 1)); diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index d5778ee..5346d49 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -2650,8 +2650,7 @@ int lmv_set_info_async(struct obd_export *exp, obd_count keylen, } lmv = &obd->u.lmv; - if (KEY_IS(KEY_READ_ONLY) || KEY_IS(KEY_FLUSH_CTX) || - KEY_IS(KEY_INIT_RECOV_BACKUP)) { + if (KEY_IS(KEY_READ_ONLY) || KEY_IS(KEY_FLUSH_CTX)) { int i, err = 0; for (i = 0; i < lmv->desc.ld_tgt_count; i++) { diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 809a207..94056a7 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -1577,29 +1577,6 @@ int mdc_set_info_async(struct obd_export *exp, int rc = -EINVAL; ENTRY; - if (KEY_IS(KEY_INIT_RECOV)) { - if (vallen != sizeof(int)) - RETURN(-EINVAL); - cfs_spin_lock(&imp->imp_lock); - imp->imp_initial_recov = *(int *)val; - cfs_spin_unlock(&imp->imp_lock); - CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n", - exp->exp_obd->obd_name, imp->imp_initial_recov); - RETURN(0); - } - /* Turn off initial_recov after we try all backup servers once */ - if (KEY_IS(KEY_INIT_RECOV_BACKUP)) { - if (vallen != sizeof(int)) - RETURN(-EINVAL); - cfs_spin_lock(&imp->imp_lock); - imp->imp_initial_recov_bk = *(int *)val; - if (imp->imp_initial_recov_bk) - imp->imp_initial_recov = 1; - cfs_spin_unlock(&imp->imp_lock); - CDEBUG(D_HA, "%s: set imp_initial_recov_bk = %d\n", - exp->exp_obd->obd_name, imp->imp_initial_recov_bk); - RETURN(0); - } if (KEY_IS(KEY_READ_ONLY)) { if (vallen != sizeof(int)) RETURN(-EINVAL); diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 412e7e1..883b621 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -781,6 +781,11 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, RETURN(rc); } +/* Not sure where this should go... */ +#define MGC_ENQUEUE_LIMIT 50 +#define MGC_TARGET_REG_LIMIT 10 +#define MGC_SEND_PARAM_LIMIT 10 + /* Send parameter to MGS*/ static int mgc_set_mgs_param(struct obd_export *exp, struct mgs_send_param *msp) @@ -805,6 +810,8 @@ static int mgc_set_mgs_param(struct obd_export *exp, memcpy(req_msp, msp, sizeof(*req_msp)); ptlrpc_request_set_replen(req); + /* Limit how long we will wait for the enqueue to complete */ + req->rq_delay_limit = MGC_SEND_PARAM_LIMIT; rc = ptlrpc_queue_wait(req); if (!rc) { rep_msp = req_capsule_server_get(&req->rq_pill, &RMF_MGS_SEND_PARAM); @@ -826,7 +833,8 @@ static int mgc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, struct config_llog_data *cld = (struct config_llog_data *)data; struct ldlm_enqueue_info einfo = { type, mode, mgc_blocking_ast, ldlm_completion_ast, NULL, NULL, data}; - + struct ptlrpc_request *req; + int short_limit = cld->cld_is_sptlrpc; int rc; ENTRY; @@ -839,12 +847,25 @@ static int mgc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, /* We need a callback for every lockholder, so don't try to ldlm_lock_match (see rev 1.1.2.11.2.47) */ - - rc = ldlm_cli_enqueue(exp, NULL, &einfo, &cld->cld_resid, NULL, flags, + req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), + &RQF_LDLM_ENQUEUE, LUSTRE_DLM_VERSION, + LDLM_ENQUEUE); + if (req == NULL) + RETURN(-ENOMEM); + ptlrpc_request_set_replen(req); + /* check if this is server or client */ + if (cld->cld_cfg.cfg_sb) { + struct lustre_sb_info *lsi = s2lsi(cld->cld_cfg.cfg_sb); + if (lsi && (lsi->lsi_flags & LSI_SERVER)) + short_limit = 1; + } + /* Limit how long we will wait for the enqueue to complete */ + req->rq_delay_limit = short_limit ? 5 : MGC_ENQUEUE_LIMIT; + rc = ldlm_cli_enqueue(exp, &req, &einfo, &cld->cld_resid, NULL, flags, NULL, 0, lockh, 0); /* A failed enqueue should still call the mgc_blocking_ast, where it will be requeued if needed ("grant failed"). */ - + ptlrpc_req_finished(req); RETURN(rc); } @@ -858,60 +879,6 @@ static int mgc_cancel(struct obd_export *exp, struct lov_stripe_md *md, RETURN(0); } -#if 0 -static int mgc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void *uarg) -{ - struct obd_device *obd = exp->exp_obd; - struct obd_ioctl_data *data = karg; - struct llog_ctxt *ctxt; - struct lvfs_run_ctxt saved; - int rc; - ENTRY; - - if (!cfs_try_module_get(THIS_MODULE)) { - CERROR("Can't get module. Is it alive?"); - return -EINVAL; - } - switch (cmd) { - /* REPLicator context */ - case OBD_IOC_PARSE: { - CERROR("MGC parsing llog %s\n", data->ioc_inlbuf1); - ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT); - rc = class_config_parse_llog(ctxt, data->ioc_inlbuf1, NULL); - GOTO(out, rc); - } -#ifdef __KERNEL__ - case OBD_IOC_LLOG_INFO: - case OBD_IOC_LLOG_PRINT: { - ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); - rc = llog_ioctl(ctxt, cmd, data); - - GOTO(out, rc); - } -#endif - /* ORIGinator context */ - case OBD_IOC_DUMP_LOG: { - ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL); - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - if (rc) - RETURN(rc); - - GOTO(out, rc); - } - default: - CERROR("mgc_ioctl(): unrecognised ioctl %#x\n", cmd); - GOTO(out, rc = -ENOTTY); - } -out: - cfs_module_put(THIS_MODULE); - - return rc; -} -#endif - /* Send target_reg message to MGS */ static int mgc_target_register(struct obd_export *exp, struct mgs_target_info *mti) @@ -936,6 +903,8 @@ static int mgc_target_register(struct obd_export *exp, memcpy(req_mti, mti, sizeof(*req_mti)); ptlrpc_request_set_replen(req); CDEBUG(D_MGC, "register %s\n", mti->mti_svname); + /* Limit how long we will wait for the enqueue to complete */ + req->rq_delay_limit = MGC_TARGET_REG_LIMIT; rc = ptlrpc_queue_wait(req); if (!rc) { @@ -954,40 +923,24 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen, void *key, obd_count vallen, void *val, struct ptlrpc_request_set *set) { - struct obd_import *imp = class_exp2cliimp(exp); int rc = -EINVAL; ENTRY; - /* Try to "recover" the initial connection; i.e. retry */ - if (KEY_IS(KEY_INIT_RECOV)) { - if (vallen != sizeof(int)) - RETURN(-EINVAL); - cfs_spin_lock(&imp->imp_lock); - imp->imp_initial_recov = *(int *)val; - cfs_spin_unlock(&imp->imp_lock); - CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n", - exp->exp_obd->obd_name, imp->imp_initial_recov); - RETURN(0); - } /* Turn off initial_recov after we try all backup servers once */ if (KEY_IS(KEY_INIT_RECOV_BACKUP)) { + struct obd_import *imp = class_exp2cliimp(exp); int value; if (vallen != sizeof(int)) RETURN(-EINVAL); value = *(int *)val; - cfs_spin_lock(&imp->imp_lock); - imp->imp_initial_recov_bk = value > 0; - /* Even after the initial connection, give up all comms if - nobody answers the first time. */ - imp->imp_recon_bk = 1; - cfs_spin_unlock(&imp->imp_lock); - CDEBUG(D_MGC, "InitRecov %s %d/%d:d%d:i%d:r%d:or%d:%s\n", - imp->imp_obd->obd_name, value, imp->imp_initial_recov, + CDEBUG(D_MGC, "InitRecov %s %d/d%d:i%d:r%d:or%d:%s\n", + imp->imp_obd->obd_name, value, imp->imp_deactive, imp->imp_invalid, imp->imp_replayable, imp->imp_obd->obd_replayable, ptlrpc_import_state_name(imp->imp_state)); /* Resurrect if we previously died */ - if (imp->imp_invalid || value > 1) + if ((imp->imp_state != LUSTRE_IMP_FULL && + imp->imp_state != LUSTRE_IMP_NEW) || value > 1) ptlrpc_reconnect_import(imp); RETURN(0); } @@ -1344,7 +1297,7 @@ int mgc_process_log(struct obd_device *mgc, * read it up here. */ if (rcl && cld->cld_is_sptlrpc) - goto out_pop; + GOTO(out_pop, rc); /* Copy the setup log locally if we can. Don't mess around if we're running an MGS though (logs are already local). */ @@ -1380,7 +1333,8 @@ int mgc_process_log(struct obd_device *mgc, /* logname and instance info should be the same, so use our copy of the instance for the update. The cfg_last_idx will be updated here. */ - rc = class_config_parse_llog(ctxt, cld->cld_logname, &cld->cld_cfg); + if (rcl == 0 || lctxt == ctxt) + rc = class_config_parse_llog(ctxt, cld->cld_logname, &cld->cld_cfg); out_pop: llog_ctxt_put(ctxt); if (ctxt != lctxt) diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 12d91e3..5463e77 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -726,8 +726,6 @@ static int obd_import_flags2str(struct obd_import *imp, char *str, int max) flag2str(deactive); flag2str(replayable); flag2str(pingable); - flag2str(recon_bk); - flag2str(last_recon); return len; } #undef flags2str diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index bc6583d..8ffd95b 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -3977,18 +3977,6 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen, RETURN(0); } - if (KEY_IS(KEY_INIT_RECOV)) { - if (vallen != sizeof(int)) - RETURN(-EINVAL); - cfs_spin_lock(&imp->imp_lock); - imp->imp_initial_recov = *(int *)val; - cfs_spin_unlock(&imp->imp_lock); - CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n", - exp->exp_obd->obd_name, - imp->imp_initial_recov); - RETURN(0); - } - if (KEY_IS(KEY_CHECKSUM)) { if (vallen != sizeof(int)) RETURN(-EINVAL); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index b04fccb..7802282 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -828,6 +828,7 @@ void ptlrpc_set_add_req(struct ptlrpc_request_set *set, cfs_list_add_tail(&req->rq_set_chain, &set->set_requests); req->rq_set = set; cfs_atomic_inc(&set->set_remaining); + req->rq_queued_time = cfs_time_current(); /* Where is the best place to set this? */ } /** @@ -886,6 +887,12 @@ static int ptlrpc_import_delay_req(struct obd_import *imp, } else if (imp->imp_state == LUSTRE_IMP_CLOSED) { DEBUG_REQ(D_ERROR, req, "IMP_CLOSED "); *status = -EIO; + } else if (imp->imp_obd->obd_no_recov) { + *status = -ESHUTDOWN; + } else if (ptlrpc_send_limit_expired(req)) { + /* probably doesn't need to be a D_ERROR after initial testing */ + DEBUG_REQ(D_ERROR, req, "send limit expired "); + *status = -EIO; } else if (req->rq_send_state == LUSTRE_IMP_CONNECTING && imp->imp_state == LUSTRE_IMP_CONNECTING) { /* allow CONNECT even if import is invalid */ ; @@ -893,13 +900,7 @@ static int ptlrpc_import_delay_req(struct obd_import *imp, DEBUG_REQ(D_ERROR, req, "invalidate in flight"); *status = -EIO; } - } else if ((imp->imp_invalid && (!imp->imp_recon_bk)) || - imp->imp_obd->obd_no_recov) { - /* If the import has been invalidated (such as by an OST - * failure), and if the import(MGC) tried all of its connection - * list (Bug 13464), the request must fail with -ESHUTDOWN. - * This indicates the requests should be discarded; an -EIO - * may result in a resend of the request. */ + } else if (imp->imp_invalid) { if (!imp->imp_deactive) DEBUG_REQ(D_ERROR, req, "IMP_INVALID"); *status = -ESHUTDOWN; /* bz 12940 */ @@ -1024,7 +1025,7 @@ static int after_reply(struct ptlrpc_request *req) LASSERT(!req->rq_receiving_reply && !req->rq_must_unlink); if (req->rq_reply_truncate) { - if (req->rq_no_resend) { + if (ptlrpc_no_resend(req)) { DEBUG_REQ(D_ERROR, req, "reply buffer overflow," " expected: %d, actual size: %d", req->rq_nob_received, req->rq_repbuf_len); @@ -1367,7 +1368,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) cfs_spin_unlock(&imp->imp_lock); GOTO(interpret, req->rq_status); } - if (req->rq_no_resend && !req->rq_wait_ctx) { + if (ptlrpc_no_resend(req) && !req->rq_wait_ctx) { req->rq_status = -ENOTCONN; ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); @@ -1385,7 +1386,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) req->rq_waiting = 0; cfs_spin_unlock(&req->rq_lock); - if (req->rq_timedout||req->rq_resend) { + if (req->rq_timedout || req->rq_resend) { /* This is re-sending anyways, * let's mark req as resend. */ cfs_spin_lock(&req->rq_lock); @@ -1610,7 +1611,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink) /* if a request can't be resent we can't wait for an answer after the timeout */ - if (req->rq_no_resend) { + if (ptlrpc_no_resend(req)) { DEBUG_REQ(D_RPCTRACE, req, "TIMEOUT-NORESEND:"); rc = 1; } diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index e50a11d..31b58da 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -273,13 +273,6 @@ void ptlrpc_invalidate_import(struct obd_import *imp) cfs_atomic_inc(&imp->imp_inval_count); - /* - * If this is an invalid MGC connection, then don't bother - * waiting for imp_inflight to drop to 0. - */ - if (imp->imp_invalid && imp->imp_recon_bk &&!imp->imp_obd->obd_no_recov) - goto out; - if (!imp->imp_invalid || imp->imp_obd->obd_no_recov) ptlrpc_deactivate_import(imp); @@ -375,7 +368,6 @@ void ptlrpc_invalidate_import(struct obd_import *imp) * "invalidate" state. */ LASSERT(cfs_atomic_read(&imp->imp_inflight) == 0); -out: obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE); sptlrpc_import_flush_all_ctx(imp); @@ -514,8 +506,7 @@ static int import_select_connection(struct obd_import *imp) we do finally connect. (FIXME: really we should wait for all network state associated with the last connection attempt to drain before trying to reconnect on it.) */ - if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item) && - !imp->imp_recon_bk /* not retrying */) { + if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) { if (at_get(&imp->imp_at.iat_net_latency) < CONNECTION_SWITCH_MAX) { at_measured(&imp->imp_at.iat_net_latency, @@ -638,27 +629,6 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid) if (rc) GOTO(out, rc); - /* last in connection list */ - if (imp->imp_conn_current->oic_item.next == &imp->imp_conn_list) { - if (imp->imp_initial_recov_bk && initial_connect) { - CDEBUG(D_HA, "Last connection attempt (%d) for %s\n", - imp->imp_conn_cnt, obd2cli_tgt(imp->imp_obd)); - /* Don't retry if connect fails */ - rc = 0; - obd_set_info_async(obd->obd_self_export, - sizeof(KEY_INIT_RECOV), - KEY_INIT_RECOV, - sizeof(rc), &rc, NULL); - } - if (imp->imp_recon_bk) { - CDEBUG(D_HA, "Last reconnection attempt (%d) for %s\n", - imp->imp_conn_cnt, obd2cli_tgt(imp->imp_obd)); - cfs_spin_lock(&imp->imp_lock); - imp->imp_last_recon = 1; - cfs_spin_unlock(&imp->imp_lock); - } - } - rc = sptlrpc_import_sec_adapt(imp, NULL, 0); if (rc) GOTO(out, rc); @@ -1127,15 +1097,7 @@ finish: out: if (rc != 0) { IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); - cfs_spin_lock(&imp->imp_lock); - if (aa->pcaa_initial_connect && !imp->imp_initial_recov && - (request->rq_import_generation == imp->imp_generation)) - ptlrpc_deactivate_and_unlock_import(imp); - else - cfs_spin_unlock(&imp->imp_lock); - - if ((imp->imp_recon_bk && imp->imp_last_recon) || - (rc == -EACCES)) { + if (rc == -EACCES) { /* * Give up trying to reconnect * EACCES means client has no permission for connection @@ -1183,10 +1145,6 @@ out: (char *)imp->imp_connection->c_remote_uuid.uuid, rc); } - cfs_spin_lock(&imp->imp_lock); - imp->imp_last_recon = 0; - cfs_spin_unlock(&imp->imp_lock); - cfs_waitq_broadcast(&imp->imp_recovery_waitq); RETURN(rc); } @@ -1464,8 +1422,6 @@ out: else IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED); memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle)); - /* Try all connections in the future - bz 12758 */ - imp->imp_last_recon = 0; cfs_spin_unlock(&imp->imp_lock); RETURN(rc); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 79c3d3a..7074b66 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -165,7 +165,7 @@ int ptlrpc_resend(struct obd_import *imp) LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON, "req %p bad\n", req); LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req); - if (!req->rq_no_resend) + if (!ptlrpc_no_resend(req)) ptlrpc_resend_req(req); } cfs_spin_unlock(&imp->imp_lock); diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index c5c3bfa..17f8ec01 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -154,8 +154,8 @@ manual_umount_client(){ } setup() { - start_ost || error "OST start failed" start_mds || error "MDT start failed" + start_ost || error "OST start failed" mount_client $MOUNT || error "client start failed" } @@ -223,10 +223,10 @@ test_0() { run_test 0 "single mount setup" test_1() { + start_mds || error "MDT start failed" start_ost echo "start ost second time..." start_ost && error "2nd OST start should fail" - start_mds || error "MDT start failed" mount_client $MOUNT || error "client start failed" check_mount || return 42 cleanup || return $? @@ -234,10 +234,10 @@ test_1() { run_test 1 "start up ost twice (should return errors)" test_2() { - start_ost start_mds echo "start mds second time.." start_mds && error "2nd MDT start should fail" + start_ost mount_client $MOUNT check_mount || return 43 cleanup || return $? @@ -330,8 +330,8 @@ test_5b() { run_test 5b "mds down, cleanup after failed mount (bug 2712) (should return errs)" test_5c() { - start_ost start_mds + start_ost [ -d $MOUNT ] || mkdir -p $MOUNT grep " $MOUNT " /etc/mtab && echo "test 5c: mtab before mount" && return 10 local oldfs="${FSNAME}" @@ -357,8 +357,8 @@ test_5d() { run_test 5d "mount with ost down" test_5e() { - start_ost start_mds + start_ost #define OBD_FAIL_PTLRPC_DELAY_SEND 0x506 do_facet client "lctl set_param fail_loc=0x80000506" @@ -551,8 +551,8 @@ run_test 19b "start/stop OSTs without MDS" test_20() { # first format the ost/mdt - start_ost start_mds + start_ost mount_client $MOUNT check_mount || return 43 rm -f $DIR/$tfile @@ -635,8 +635,8 @@ test_22() { run_test 22 "start a client before osts (should return errs)" test_23a() { # was test_23 - setup - # fail mds + setup + # fail mds stop $SINGLEMDS # force down client so that recovering mds waits for reconnect local running=$(grep -c $MOUNT /proc/mounts) || true @@ -689,8 +689,8 @@ umount_client $MOUNT cleanup_nocli test_23b() { # was test_23 - start_ost start_mds + start_ost # Simulate -EINTR during mount OBD_FAIL_LDLM_CLOSE_THREAD lctl set_param fail_loc=0x80000313 mount_client $MOUNT -- 1.8.3.1