From: nathan Date: Tue, 16 Oct 2007 18:40:43 +0000 (+0000) Subject: b=13537 X-Git-Tag: v1_8_0_110~1108 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=8c3c4530d6d70488b2e9754fe7b363926faaa817;hp=f5af85401efd18c76778f579a71ad5165eaa77ee;p=fs%2Flustre-release.git b=13537 i=adilger i=johann extend reconnect timer as new clients reconnect fix recovery_status proc file consistently use xNNN for xids for easier debugging --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index f0a0578..2c2a04e 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -52,7 +52,7 @@ extern unsigned int obd_alloc_fail_rate; #define LDLM_TIMEOUT_DEFAULT 20 /* Time to wait for all clients to reconnect during recovery */ /* Should be very conservative; must catch the first reconnect after reboot */ -#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2) +#define OBD_RECOVERY_FACTOR (5 / 2) /* times obd_timeout */ /* Change recovery-small 26b time if you change this */ #define PING_INTERVAL max(obd_timeout / 4, 1U) /* Client may skip 1 ping; wait for 2.5 */ diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 8b60e46..2a14fb7 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -751,20 +751,25 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_RECONNECT, 2 * obd_timeout); } - /* We want to handle EALREADY but *not* -EALREADY from - * target_handle_reconnect(), return reconnection state in a flag */ - if (rc == EALREADY) { - lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT); - rc = 0; - } else if (rc) { + if (rc < 0) GOTO(out, rc); - } /* Tell the client if we're in recovery. */ - /* If this is the first client, start the recovery timer */ if (target->obd_recovering) { lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING); - target_start_recovery_timer(target, handler, req); + /* If this is the first time a client connects, + reset the recovery timer */ + if (rc == 0) + target_start_recovery_timer(target, handler, req); + } + + /* We want to handle EALREADY but *not* -EALREADY from + * target_handle_reconnect(), return reconnection state in a flag */ + if (rc == EALREADY) { + lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT); + rc = 0; + } else { + LASSERT(rc == 0); } /* Tell the client if we support replayable requests */ @@ -1040,8 +1045,8 @@ void target_cleanup_recovery(struct obd_device *obd) void target_abort_recovery(void *data) { struct obd_device *obd = data; - ENTRY; + spin_lock_bh(&obd->obd_processing_task_lock); if (!obd->obd_recovering) { spin_unlock_bh(&obd->obd_processing_task_lock); @@ -1049,12 +1054,14 @@ void target_abort_recovery(void *data) return; } obd->obd_recovering = obd->obd_abort_recovery = 0; - obd->obd_recoverable_clients = 0; target_cancel_recovery_timer(obd); spin_unlock_bh(&obd->obd_processing_task_lock); - LCONSOLE_WARN("%s: recovery period over; disconnecting unfinished " - "clients.\n", obd->obd_name); + LCONSOLE_WARN("%s: recovery period over; %d clients never reconnected " + "after %lds (%d clients did)\n", + obd->obd_name, obd->obd_recoverable_clients, + cfs_time_current_sec()- obd->obd_recovery_start, + obd->obd_connected_clients); class_disconnect_stale_exports(obd); abort_recovery_queue(obd); @@ -1082,8 +1089,7 @@ void target_cancel_recovery_timer(struct obd_device *obd) cfs_timer_disarm(&obd->obd_recovery_timer); } -static void reset_recovery_timer(struct obd_device *obd, - struct ptlrpc_request *req) +static void reset_recovery_timer(struct obd_device *obd, int mintime) { spin_lock_bh(&obd->obd_processing_task_lock); if (!obd->obd_recovering) { @@ -1091,35 +1097,34 @@ static void reset_recovery_timer(struct obd_device *obd, return; } /* Track the client's largest expected replay time */ - if (lustre_msg_get_timeout(req->rq_reqmsg) > obd->obd_recovery_timeout) - obd->obd_recovery_timeout = - lustre_msg_get_timeout(req->rq_reqmsg); - LASSERT(obd->obd_recovery_timeout >= OBD_RECOVERY_TIMEOUT); + obd->obd_recovery_timeout = max(mintime, obd->obd_recovery_timeout); + obd->obd_recovery_end = cfs_time_current_sec() + + obd->obd_recovery_timeout; cfs_timer_arm(&obd->obd_recovery_timer, cfs_time_shift(obd->obd_recovery_timeout)); spin_unlock_bh(&obd->obd_processing_task_lock); CDEBUG(D_HA, "%s: recovery timer will expire in %u seconds\n", obd->obd_name, obd->obd_recovery_timeout); - obd->obd_recovery_end = cfs_time_current_sec() + - obd->obd_recovery_timeout; } -/* Only start it the first time called */ +/* Reset the timer with each new client connection */ void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler, struct ptlrpc_request *req) { spin_lock_bh(&obd->obd_processing_task_lock); - if (obd->obd_recovery_handler) { - spin_unlock_bh(&obd->obd_processing_task_lock); - return; - } + if (obd->obd_recovery_handler) + goto out; CWARN("%s: starting recovery timer\n", obd->obd_name); + obd->obd_recovery_start = cfs_time_current_sec(); + /* minimum */ + obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout; obd->obd_recovery_handler = handler; cfs_timer_init(&obd->obd_recovery_timer, target_recovery_expired, obd); +out: spin_unlock_bh(&obd->obd_processing_task_lock); - - reset_recovery_timer(obd, req); + reset_recovery_timer(obd, OBD_RECOVERY_FACTOR * + lustre_msg_get_timeout(req->rq_reqmsg)); } static int check_for_next_transno(struct obd_device *obd) @@ -1180,9 +1185,10 @@ static void process_recovery_queue(struct obd_device *obd) obd->obd_next_recovery_transno) { spin_unlock_bh(&obd->obd_processing_task_lock); CDEBUG(D_HA, "Waiting for transno "LPD64" (1st is " - LPD64")\n", + LPD64", x"LPU64")\n", obd->obd_next_recovery_transno, - lustre_msg_get_transno(req->rq_reqmsg)); + lustre_msg_get_transno(req->rq_reqmsg), + req->rq_xid); l_wait_event(obd->obd_next_transno_waitq, check_for_next_transno(obd), &lwi); spin_lock_bh(&obd->obd_processing_task_lock); @@ -1201,7 +1207,8 @@ static void process_recovery_queue(struct obd_device *obd) DEBUG_REQ(D_HA, req, "processing: "); (void)obd->obd_recovery_handler(req); obd->obd_replayed_requests++; - reset_recovery_timer(obd, req); + reset_recovery_timer(obd, 2 * /* safety */ + at_get(&req->rq_rqbd->rqbd_service->srv_at_estimate)); /* bug 1580: decide how to properly sync() in recovery */ //mds_fsync_super(obd->u.obt.obt_sb); class_export_put(req->rq_export); @@ -1553,9 +1560,10 @@ void target_committed_to_req(struct ptlrpc_request *req) lustre_msg_set_last_committed(req->rq_repmsg, obd->obd_last_committed); else - DEBUG_REQ(D_IOCTL, req, "not sending last_committed update"); + DEBUG_REQ(D_IOCTL, req, "not sending last_committed update (%d/" + "%d)", obd->obd_no_transno, req->rq_repmsg == NULL); - CDEBUG(D_INFO, "last_committed "LPU64", xid "LPU64"\n", + CDEBUG(D_INFO, "last_committed x"LPU64", this req x"LPU64"\n", obd->obd_last_committed, req->rq_xid); } diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index e55621d..5082548 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -446,10 +446,8 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) obd->obd_recoverable_clients, mds->mds_last_transno); obd->obd_next_recovery_transno = obd->obd_last_committed + 1; obd->obd_recovering = 1; - obd->obd_recovery_start = CURRENT_SECONDS; - obd->obd_recovery_timeout = OBD_RECOVERY_TIMEOUT; - obd->obd_recovery_end = obd->obd_recovery_start + - obd->obd_recovery_timeout; + obd->obd_recovery_start = 0; + obd->obd_recovery_end = 0; } mds->mds_mount_count = mount_count + 1; diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 89a7b5c..9d3d0ed 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -958,9 +958,11 @@ static void class_disconnect_export_list(struct list_head *list, int flags) fake_exp->exp_flags = flags; spin_unlock(&fake_exp->exp_lock); + CDEBUG(D_HA, "%s: disconnecting export at %s (%p), " + "last request at %ld\n", + exp->exp_obd->obd_name, obd_export_nid2str(exp), + exp, exp->exp_last_request_time); rc = obd_disconnect(fake_exp); - CDEBUG(D_HA, "disconnected export at %s (%p): rc %d\n", - obd_export_nid2str(exp), exp, rc); class_export_put(exp); } EXIT; diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 53c76e5..8ed0960 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -1486,7 +1486,6 @@ int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off, if (lprocfs_obd_snprintf(&page, size, &len, "status: ") <= 0) goto out; - if (obd->obd_max_recoverable_clients == 0) { lprocfs_obd_snprintf(&page, size, &len, "INACTIVE\n"); goto fclose; @@ -1496,64 +1495,60 @@ int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off, if (obd->obd_recovering == 0) { if (lprocfs_obd_snprintf(&page, size, &len, "COMPLETE\n") <= 0) goto out; - - if (lprocfs_obd_snprintf(&page, size, &len, "recovery_start: %lu\n", - obd->obd_recovery_start) <= 0) + if (lprocfs_obd_snprintf(&page, size, &len, + "recovery_start: %lu\n", + obd->obd_recovery_start) <= 0) goto out; - - if (lprocfs_obd_snprintf(&page, size, &len, "recovery_end: %lu\n", - obd->obd_recovery_end) <= 0) + if (lprocfs_obd_snprintf(&page, size, &len, + "recovery_duration: %lu\n", + obd->obd_recovery_end - + obd->obd_recovery_start) <= 0) goto out; - - /* Number of clients have have completed recovery */ - if (lprocfs_obd_snprintf(&page, size, &len, "recovered_clients: %d\n", - obd->obd_max_recoverable_clients - obd->obd_recoverable_clients) <= 0) + /* Number of clients that have completed recovery */ + if (lprocfs_obd_snprintf(&page, size, &len, + "completed_clients: %d/%d\n", + obd->obd_max_recoverable_clients - + obd->obd_recoverable_clients, + obd->obd_max_recoverable_clients) <= 0) goto out; - - if (lprocfs_obd_snprintf(&page, size, &len, "unrecovered_clients: %d\n", - obd->obd_recoverable_clients) <= 0) + if (lprocfs_obd_snprintf(&page, size, &len, + "replayed_requests: %d\n", + obd->obd_replayed_requests) <= 0) goto out; - - if (lprocfs_obd_snprintf(&page, size, &len, "last_transno: "LPD64"\n", - obd->obd_next_recovery_transno - 1) <= 0) + if (lprocfs_obd_snprintf(&page, size, &len, + "last_transno: "LPD64"\n", + obd->obd_next_recovery_transno - 1)<=0) goto out; - - lprocfs_obd_snprintf(&page, size, &len, "replayed_requests: %d\n", obd->obd_replayed_requests); goto fclose; } if (lprocfs_obd_snprintf(&page, size, &len, "RECOVERING\n") <= 0) goto out; - if (lprocfs_obd_snprintf(&page, size, &len, "recovery_start: %lu\n", - obd->obd_recovery_start) <= 0) + obd->obd_recovery_start) <= 0) goto out; - - if (lprocfs_obd_snprintf(&page, size, &len, "time remaining: %lu\n", - CURRENT_SECONDS >= obd->obd_recovery_end ? 0 : - obd->obd_recovery_end - CURRENT_SECONDS) <= 0) + if (lprocfs_obd_snprintf(&page, size, &len, "time_remaining: %lu\n", + cfs_time_current_sec() >= obd->obd_recovery_end ? 0 : + obd->obd_recovery_end - cfs_time_current_sec()) <= 0) goto out; - - if(lprocfs_obd_snprintf(&page, size, &len, "connected_clients: %d/%d\n", - obd->obd_connected_clients, - obd->obd_max_recoverable_clients) <= 0) + if (lprocfs_obd_snprintf(&page, size, &len,"connected_clients: %d/%d\n", + obd->obd_connected_clients, + obd->obd_max_recoverable_clients) <= 0) goto out; - - /* Number of clients have have completed recovery */ - if (lprocfs_obd_snprintf(&page, size, &len, "completed_clients: %d/%d\n", - obd->obd_max_recoverable_clients - obd->obd_recoverable_clients, + /* Number of clients that have completed recovery */ + if (lprocfs_obd_snprintf(&page, size, &len,"completed_clients: %d/%d\n", + obd->obd_max_recoverable_clients - + obd->obd_recoverable_clients, obd->obd_max_recoverable_clients) <= 0) goto out; - - if (lprocfs_obd_snprintf(&page, size, &len, "replayed_requests: %d/??\n", + if (lprocfs_obd_snprintf(&page, size, &len,"replayed_requests: %d/??\n", obd->obd_replayed_requests) <= 0) goto out; - if (lprocfs_obd_snprintf(&page, size, &len, "queued_requests: %d\n", obd->obd_requests_queued_for_recovery) <= 0) goto out; - - lprocfs_obd_snprintf(&page, size, &len, "next_transno: "LPD64"\n", obd->obd_next_recovery_transno); + lprocfs_obd_snprintf(&page, size, &len, "next_transno: "LPD64"\n", + obd->obd_next_recovery_transno); fclose: *eof = 1; diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 3ef1477..2356aa0 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -805,10 +805,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) le64_to_cpu(fsd->lsd_last_transno)); obd->obd_next_recovery_transno = obd->obd_last_committed + 1; obd->obd_recovering = 1; - obd->obd_recovery_start = CURRENT_SECONDS; - obd->obd_recovery_timeout = OBD_RECOVERY_TIMEOUT; - obd->obd_recovery_end = obd->obd_recovery_start + - obd->obd_recovery_timeout; + obd->obd_recovery_start = 0; + obd->obd_recovery_end = 0; } out: diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index c85f9e6..c182672 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1556,6 +1556,7 @@ void ptlrpc_free_committed(struct obd_import *imp) imp->imp_generation == imp->imp_last_generation_checked) { CDEBUG(D_RPCTRACE, "%s: skip recheck: last_committed "LPU64"\n", imp->imp_obd->obd_name, imp->imp_peer_committed_transno); + EXIT; return; } @@ -1990,12 +1991,9 @@ int ptlrpc_replay_req(struct ptlrpc_request *req) ENTRY; LASSERT(req->rq_import->imp_state == LUSTRE_IMP_REPLAY); - /* Not handling automatic bulk replay yet (or ever?) */ LASSERT(req->rq_bulk == NULL); - DEBUG_REQ(D_HA, req, "REPLAY"); - LASSERT (sizeof (*aa) <= sizeof (req->rq_async_args)); aa = (struct ptlrpc_replay_async_args *)&req->rq_async_args; memset(aa, 0, sizeof *aa); @@ -2011,6 +2009,8 @@ int ptlrpc_replay_req(struct ptlrpc_request *req) /* Readjust the timeout for current conditions */ ptlrpc_at_set_req_timeout(req); + DEBUG_REQ(D_HA, req, "REPLAY"); + atomic_inc(&req->rq_import->imp_replay_inflight); ptlrpc_request_addref(req); /* ptlrpcd needs a ref */ diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 2636df8..5a9e9db 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -338,7 +338,8 @@ static int import_select_connection(struct obd_import *imp) CONNECTION_SWITCH_INC); } LASSERT(imp_conn->oic_last_attempt); - CWARN("Tried all connections, increasing latency to %ds\n", + CWARN("%s: tried all connections, increasing latency to %ds\n", + imp->imp_obd->obd_name, at_get(&imp->imp_at.iat_net_latency)); } @@ -641,6 +642,8 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, } if (imp->imp_invalid) { + CDEBUG(D_HA, "%s: reconnected but import is invalid; " + "marking evicted\n", imp->imp_obd->obd_name); IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); } else if (MSG_CONNECT_RECOVERING & msg_flags) { CDEBUG(D_HA, "%s: reconnected to %s during replay\n", @@ -662,7 +665,8 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, imp->imp_last_replay_transno = 0; IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY); } else { - DEBUG_REQ(D_HA, request, "evicting, flags=%x", msg_flags); + DEBUG_REQ(D_HA, request, "evicting (not initial connect and " + "flags reconnect/recovering not set: %x)",msg_flags); imp->imp_remote_handle = *lustre_msg_get_handle(request->rq_repmsg); IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 9d88451..d6e0f8f 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -2398,7 +2398,8 @@ void _debug_req(struct ptlrpc_request *req, __u32 mask, libcfs_debug_vmsg2(data->msg_cdls, data->msg_subsys, mask, data->msg_file, data->msg_fn, data->msg_line, fmt, args, " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d " - "e %d ref %d fl "REQ_FLAGS_FMT"/%x/%x rc %d/%d\n", + "e %d to %d dl %ld ref %d fl "REQ_FLAGS_FMT"/%x/%x " + "rc %d/%d\n", req, req->rq_xid, req->rq_transno, req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1, req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : @@ -2410,7 +2411,8 @@ void _debug_req(struct ptlrpc_request *req, __u32 mask, (char *)req->rq_export->exp_connection->c_remote_uuid.uuid : "", (req->rq_import && req->rq_import->imp_client) ? req->rq_import->imp_client->cli_request_portal : -1, - req->rq_reqlen, req->rq_replen, req->rq_early_count, + req->rq_reqlen, req->rq_replen, + req->rq_early_count, req->rq_timeout, req->rq_deadline, atomic_read(&req->rq_refcount), DEBUG_REQ_FLAGS(req), req->rq_reqmsg ? lustre_msg_get_flags(req->rq_reqmsg) : 0, req->rq_repmsg ? lustre_msg_get_flags(req->rq_repmsg) : 0, diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 40bddf1..275ebe8 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -997,7 +997,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, request->rq_phase = RQ_PHASE_INTERPRET; CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc " - "%s:%s+%d:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), + "%s:%s+%d:%d:x"LPU64":%s:%d\n", cfs_curproc_comm(), (request->rq_export ? (char *)request->rq_export->exp_client_uuid.uuid : "0"), (request->rq_export ? @@ -1013,7 +1013,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, request->rq_phase = RQ_PHASE_COMPLETE; CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc " - "%s:%s+%d:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), + "%s:%s+%d:%d:x"LPU64":%s:%d\n", cfs_curproc_comm(), (request->rq_export ? (char *)request->rq_export->exp_client_uuid.uuid : "0"), (request->rq_export ? @@ -1031,14 +1031,15 @@ put_conn: class_export_put(request->rq_export); if (cfs_time_current_sec() > request->rq_deadline) { - DEBUG_REQ(D_WARNING, request, "Request took longer than" - " estimated (%+lds); client may timeout.", + DEBUG_REQ(D_WARNING, request, "Request x"LPU64" took longer " + "than estimated (%+lds); client may timeout.", + request->rq_xid, cfs_time_current_sec() - request->rq_deadline); } do_gettimeofday(&work_end); timediff = cfs_timeval_sub(&work_end, &work_start, NULL); - CDEBUG(D_RPCTRACE, "request "LPU64" opc %u from %s processed in " + CDEBUG(D_RPCTRACE, "request x"LPU64" opc %u from %s processed in " "%ldus (%ldus total) trans "LPU64" rc %d/%d\n", request->rq_xid, lustre_msg_get_opc(request->rq_reqmsg), libcfs_id2str(request->rq_peer), timediff,