From 8c092dca75f5794837537edaf81027ec4d8e3997 Mon Sep 17 00:00:00 2001 From: bobijam Date: Tue, 23 Dec 2008 06:32:05 +0000 Subject: [PATCH] Branch b_release_1_6_7 b=18049 o=johann i=adilger i=zhenyu.xu (bobijam) Descriptoin: aborting recovery hang on MDS Details : don't throttle destroy RPCs for the MDT. --- lustre/ChangeLog | 12 +++++++++--- lustre/osc/osc_request.c | 31 +++++++++++++++++-------------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 6483871..61831b3 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -40,10 +40,16 @@ * File join has been disabled in this release, refer to Bugzilla 16929. * A new Lustre ADIO driver is available for MPICH2-1.0.7. - * NFS export disabled when stack size < 8192. Since the NFSv4 export of + * NFS export disabled when stack size < 8192. Since the NFSv4 export of Lustre filesystem with 4K stack may cause a stack overflow. For more information, please refer to bugzilla 17630. +Severity : normal +Frequency : start MDS on uncleanly shutdowned MDS device +Bugzilla : 18049 +Descriptoin: aborting recovery hang on MDS +Details : don't throttle destroy RPCs for the MDT. + Severity : major Frequency : on remount Bugzilla : 18018 @@ -62,7 +68,7 @@ Details : client_disconnect_export vs connect request race. Severity : minor Frequency : always Bugzilla : 16693 -Description: shrink LOV EAs before replying +Description: shrink LOV EAs before replying Details : correctly adjust LOV EA buffer for reply. Severity : normal @@ -82,7 +88,7 @@ Severity : normal Frequency : rare, need ACLs on inode. Bugzilla : 16492 Description: client can't handle ost additional correctly -Details : if ost was added after client connected to mds client can have +Details : if ost was added after client connected to mds client can have hit lnet_try_match_md ... to big messages to wide striped files. in this case need teach client to handle config events about add lov target and update client max ea size at that event. diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 186a0fe..88cf904 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -645,7 +645,6 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, RETURN(-ENOMEM); req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ - req->rq_interpret_reply = osc_destroy_interpret; ptlrpc_at_set_req_timeout(req); body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); @@ -657,15 +656,19 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, memcpy(&body->oa, oa, sizeof(*oa)); ptlrpc_req_set_repsize(req, 2, size); - if (!osc_can_send_destroy(cli)) { - struct l_wait_info lwi = { 0 }; - - /* - * Wait until the number of on-going destroy RPCs drops - * under max_rpc_in_flight - */ - l_wait_event_exclusive(cli->cl_destroy_waitq, - osc_can_send_destroy(cli), &lwi); + /* don't throttle destroy RPCs for the MDT */ + if (!(cli->cl_import->imp_connect_flags_orig & OBD_CONNECT_MDS)) { + req->rq_interpret_reply = osc_destroy_interpret; + if (!osc_can_send_destroy(cli)) { + struct l_wait_info lwi = { 0 }; + + /* + * Wait until the number of on-going destroy RPCs drops + * under max_rpc_in_flight + */ + l_wait_event_exclusive(cli->cl_destroy_waitq, + osc_can_send_destroy(cli), &lwi); + } } /* Do not wait for response */ @@ -2196,7 +2199,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, #if defined(__KERNEL__) && defined(__linux__) if(!(PageLocked(oap->oap_page) && (CheckWriteback(oap->oap_page, cmd) || oap->oap_oig !=NULL))) { - CDEBUG(D_PAGE, "page %p lost wb %lx/%x\n", + CDEBUG(D_PAGE, "page %p lost wb %lx/%x\n", oap->oap_page, (long)oap->oap_page->flags, oap->oap_async_flags); LBUG(); } @@ -3724,7 +3727,7 @@ static int osc_llog_init(struct obd_device *obd, struct obd_device *tgt, rc = llog_setup(obd, LLOG_SIZE_REPL_CTXT, tgt, count, NULL, &osc_size_repl_logops); if (rc) { - struct llog_ctxt *ctxt = + struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); if (ctxt) llog_cleanup(ctxt); @@ -3796,13 +3799,13 @@ static int osc_disconnect(struct obd_export *exp) ctxt = llog_get_context(obd, LLOG_SIZE_REPL_CTXT); if (ctxt) { if (obd->u.cli.cl_conn_count == 1) { - /* Flush any remaining cancel messages out to the + /* Flush any remaining cancel messages out to the * target */ llog_sync(ctxt, exp); } llog_ctxt_put(ctxt); } else { - CDEBUG(D_HA, "No LLOG_SIZE_REPL_CTXT found in obd %p\n", + CDEBUG(D_HA, "No LLOG_SIZE_REPL_CTXT found in obd %p\n", obd); } -- 1.8.3.1