X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fptlrpc%2Frecover.c;h=18bc6f4c891c4289077bf8167a7ccea0b787dcc4;hb=090c677210ee2946d99c71412e4ff762bb300f4f;hp=9341403e8dad1521fd2bcdfdc0fc2cd52733d7d3;hpb=bb5c8e257d0d1ed111da68715c0a3a49849b4fdf;p=fs%2Flustre-release.git diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 9341403..18bc6f4 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -50,13 +50,13 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd) char *argv[4]; char *envp[3]; int rc; - ENTRY; + argv[0] = obd_lustre_upcall; argv[1] = "RECOVERY_OVER"; argv[2] = obd->obd_uuid.uuid; argv[3] = NULL; - + envp[0] = "HOME=/"; envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[2] = NULL; @@ -68,7 +68,7 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd) argv[0], argv[1], argv[2], rc); } else { - CERROR("Invoked upcall %s %s %s", + CERROR("Invoked upcall %s %s %s\n", argv[0], argv[1], argv[2]); } } @@ -76,11 +76,20 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd) void ptlrpc_run_failed_import_upcall(struct obd_import* imp) { #ifdef __KERNEL__ + unsigned long flags; char *argv[7]; char *envp[3]; int rc; - ENTRY; + + spin_lock_irqsave(&imp->imp_lock, flags); + if (imp->imp_state == LUSTRE_IMP_CLOSED) { + spin_unlock_irqrestore(&imp->imp_lock, flags); + EXIT; + return; + } + spin_unlock_irqrestore(&imp->imp_lock, flags); + argv[0] = obd_lustre_upcall; argv[1] = "FAILED_IMPORT"; argv[2] = imp->imp_target_uuid.uuid; @@ -104,32 +113,62 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp) argv[0], argv[1], argv[2], argv[3], argv[4]); } #else + if (imp->imp_state == LUSTRE_IMP_CLOSED) { + EXIT; + return; + } ptlrpc_recover_import(imp, NULL); #endif } -int ptlrpc_replay(struct obd_import *imp) +/* This might block waiting for the upcall to start, so it should + * not be called from a thread that shouldn't block. (Like ptlrpcd) */ +void ptlrpc_initiate_recovery(struct obd_import *imp) +{ + ENTRY; + + LASSERT (obd_lustre_upcall != NULL); + + if (strcmp(obd_lustre_upcall, "DEFAULT") == 0) { + CDEBUG(D_ERROR, "%s: starting recovery without upcall\n", + imp->imp_target_uuid.uuid); + ptlrpc_connect_import(imp, NULL); + } + else if (strcmp(obd_lustre_upcall, "NONE") == 0) { + CDEBUG(D_ERROR, "%s: recovery diabled\n", + imp->imp_target_uuid.uuid); + } + else { + CDEBUG(D_ERROR, "%s: calling upcall to start recovery\n", + imp->imp_target_uuid.uuid); + ptlrpc_run_failed_import_upcall(imp); + } + + EXIT; +} + +int ptlrpc_replay_next(struct obd_import *imp, int *inflight) { int rc = 0; struct list_head *tmp, *pos; struct ptlrpc_request *req; unsigned long flags; + __u64 last_transno; ENTRY; + *inflight = 0; + /* It might have committed some after we last spoke, so make sure we * get rid of them now. */ spin_lock_irqsave(&imp->imp_lock, flags); ptlrpc_free_committed(imp); + last_transno = imp->imp_last_replay_transno; spin_unlock_irqrestore(&imp->imp_lock, flags); - CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n", - imp, imp->imp_target_uuid.uuid, imp->imp_peer_committed_transno); - - list_for_each(tmp, &imp->imp_replay_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - DEBUG_REQ(D_HA, req, "RETAINED: "); - } + CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n", + imp, imp->imp_target_uuid.uuid, imp->imp_peer_committed_transno, + last_transno); /* Do I need to hold a lock across this iteration? We shouldn't be * racing with any additions to the list, because we're in recovery @@ -147,20 +186,19 @@ int ptlrpc_replay(struct obd_import *imp) * just a little race... */ list_for_each_safe(tmp, pos, &imp->imp_replay_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - - DEBUG_REQ(D_HA, req, "REPLAY:"); - - rc = ptlrpc_replay_req(req); - - if (rc) { - CERROR("recovery replay error %d for req "LPD64"\n", - rc, req->rq_xid); - RETURN(rc); + req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); + if (req->rq_transno > last_transno) { + rc = ptlrpc_replay_req(req); + if (rc) { + CERROR("recovery replay error %d for req " + LPD64"\n", rc, req->rq_xid); + RETURN(rc); + } + *inflight = 1; + break; } } - - RETURN(0); + RETURN(rc); } int ptlrpc_resend(struct obd_import *imp) @@ -178,7 +216,10 @@ int ptlrpc_resend(struct obd_import *imp) /* Well... what if lctl recover is called twice at the same time? */ spin_lock_irqsave(&imp->imp_lock, flags); - LASSERT(imp->imp_state == LUSTRE_IMP_RECOVER); + if (imp->imp_state != LUSTRE_IMP_RECOVER) { + spin_unlock_irqrestore(&imp->imp_lock, flags); + RETURN(-1); + } spin_unlock_irqrestore(&imp->imp_lock, flags); list_for_each_safe(tmp, pos, &imp->imp_sending_list) { @@ -199,51 +240,12 @@ void ptlrpc_wake_delayed(struct obd_import *imp) list_for_each_safe(tmp, pos, &imp->imp_delayed_list) { req = list_entry(tmp, struct ptlrpc_request, rq_list); - ptlrpc_put_connection(req->rq_connection); - req->rq_connection = - ptlrpc_connection_addref(req->rq_import->imp_connection); - - if (req->rq_set) { - DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set); - wake_up(&req->rq_set->set_waitq); - } else { - DEBUG_REQ(D_HA, req, "waking:"); - wake_up(&req->rq_reply_waitq); - } + DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set); + ptlrpc_wake_client_req(req); } spin_unlock_irqrestore(&imp->imp_lock, flags); } -inline void ptlrpc_invalidate_import_state(struct obd_import *imp) -{ - struct obd_device *obd = imp->imp_obd; - struct ldlm_namespace *ns = obd->obd_namespace; - - ptlrpc_abort_inflight(imp); - -#if 0 - obd_invalidate_import(obd, imp); -#endif - - ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); -} - -void ptlrpc_handle_failed_import(struct obd_import *imp) -{ - ENTRY; - if (!imp->imp_replayable) { - CDEBUG(D_HA, - "import %s@%s for %s not replayable, deactivating\n", - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid, - imp->imp_obd->obd_name); - ptlrpc_set_import_active(imp, 0); - } - - ptlrpc_run_failed_import_upcall(imp); - EXIT; -} - void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req) { int rc; @@ -255,107 +257,55 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req) imp->imp_obd->obd_name, imp->imp_target_uuid.uuid, imp->imp_connection->c_remote_uuid.uuid); + + if (ptlrpc_set_import_discon(imp)) { + if (!imp->imp_replayable) { + CDEBUG(D_HA, "import %s@%s for %s not replayable, " + "auto-deactivating\n", + imp->imp_target_uuid.uuid, + imp->imp_connection->c_remote_uuid.uuid, + imp->imp_obd->obd_name); + ptlrpc_deactivate_import(imp); + } - rc = ptlrpc_recover_import_no_retry(imp, NULL); - - if (failed_req->rq_import_generation != imp->imp_generation) { - spin_lock_irqsave (&failed_req->rq_lock, flags); - failed_req->rq_err = 1; - spin_unlock_irqrestore (&failed_req->rq_lock, flags); - } - else { - ptlrpc_resend_req(failed_req); - if (rc && rc != -EALREADY) - ptlrpc_handle_failed_import(imp); - + rc = ptlrpc_connect_import(imp, NULL); } + + + /* Wait for recovery to complete and resend. If evicted, then + this request will be errored out later.*/ + spin_lock_irqsave(&failed_req->rq_lock, flags); + if (!failed_req->rq_no_resend) + failed_req->rq_resend = 1; + spin_unlock_irqrestore(&failed_req->rq_lock, flags); + EXIT; } -int ptlrpc_set_import_active(struct obd_import *imp, int active) +/* + * This should only be called by the ioctl interface, currently + * with the lctl deactivate and activate commands. + */ +int ptlrpc_set_import_active(struct obd_import *imp, int active) { struct obd_device *obd = imp->imp_obd; - unsigned long flags; + int rc = 0; LASSERT(obd); /* When deactivating, mark import invalid, and abort in-flight * requests. */ if (!active) { - spin_lock_irqsave(&imp->imp_lock, flags); - /* This is a bit of a hack, but invalidating replayable - * imports makes a temporary reconnect failure into a much more - * ugly -- and hard to remedy -- situation. */ - if (!imp->imp_replayable) { - CDEBUG(D_HA, "setting import %s INVALID\n", - imp->imp_target_uuid.uuid); - imp->imp_invalid = 1; - } - imp->imp_generation++; - spin_unlock_irqrestore(&imp->imp_lock, flags); - ptlrpc_invalidate_import_state(imp); - } + ptlrpc_invalidate_import(imp, 0); + } - /* When activating, mark import valid */ + /* When activating, mark import valid, and attempt recovery */ if (active) { CDEBUG(D_HA, "setting import %s VALID\n", imp->imp_target_uuid.uuid); - spin_lock_irqsave(&imp->imp_lock, flags); - imp->imp_invalid = 0; - spin_unlock_irqrestore(&imp->imp_lock, flags); - } - - if (obd->obd_observer) - RETURN(obd_notify(obd->obd_observer, obd, active)); - - RETURN(0); -} - -void ptlrpc_fail_import(struct obd_import *imp, int generation) -{ - unsigned long flags; - int in_recovery = 0; - ENTRY; - - LASSERT (!imp->imp_dlm_fake); - - spin_lock_irqsave(&imp->imp_lock, flags); - if (imp->imp_state != LUSTRE_IMP_FULL) { - in_recovery = 1; - } else { - CDEBUG(D_HA, "%s: new state: DISCON\n", - imp->imp_client->cli_name); - imp->imp_state = LUSTRE_IMP_DISCON; + rc = ptlrpc_recover_import(imp, NULL); } - spin_unlock_irqrestore(&imp->imp_lock, flags); - - if (in_recovery) { - EXIT; - return; - } - - ptlrpc_handle_failed_import(imp); - EXIT; -} - -static int signal_completed_replay(struct obd_import *imp) -{ - struct ptlrpc_request *req; - int rc; - ENTRY; - - req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL); - if (!req) - RETURN(-ENOMEM); - - req->rq_replen = lustre_msg_size(0, NULL); - req->rq_send_state = LUSTRE_IMP_REPLAY; - req->rq_reqmsg->flags |= MSG_LAST_REPLAY; - req->rq_timeout *= 3; - - rc = ptlrpc_queue_wait(req); - ptlrpc_req_finished(req); RETURN(rc); } @@ -364,168 +314,58 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid) int rc; ENTRY; + /* force import to be disconnected. */ + ptlrpc_set_import_discon(imp); + rc = ptlrpc_recover_import_no_retry(imp, new_uuid); - if (rc && rc != -EALREADY) { - unsigned long flags; - CDEBUG(D_HA, "recovery of %s on %s failed (%d); restarting\n", - imp->imp_target_uuid.uuid, - new_uuid ? new_uuid : - (char *)imp->imp_connection->c_remote_uuid.uuid, rc); - spin_lock_irqsave(&imp->imp_lock, flags); - imp->imp_state = LUSTRE_IMP_FULL; - spin_unlock_irqrestore(&imp->imp_lock, flags); - ptlrpc_fail_import(imp, imp->imp_generation); - } RETURN(rc); } +int ptlrpc_import_in_recovery(struct obd_import *imp) +{ + unsigned long flags; + int in_recovery = 1; + spin_lock_irqsave(&imp->imp_lock, flags); + if (imp->imp_state == LUSTRE_IMP_FULL || + imp->imp_state == LUSTRE_IMP_CLOSED || + imp->imp_state == LUSTRE_IMP_DISCON) + in_recovery = 0; + spin_unlock_irqrestore(&imp->imp_lock, flags); + return in_recovery; +} + static int ptlrpc_recover_import_no_retry(struct obd_import *imp, char *new_uuid) { int rc; unsigned long flags; int in_recovery = 0; - int was_invalid = 0; + struct l_wait_info lwi; ENTRY; spin_lock_irqsave(&imp->imp_lock, flags); - if (imp->imp_state == LUSTRE_IMP_FULL) { - CDEBUG(D_HA, "%s: new state: DISCON\n", - imp->imp_client->cli_name); - imp->imp_state = LUSTRE_IMP_DISCON; - } - if (imp->imp_state != LUSTRE_IMP_DISCON) { in_recovery = 1; - } else if (imp->imp_invalid) { - imp->imp_invalid = 0; - was_invalid = 1; } - spin_unlock_irqrestore(&imp->imp_lock, flags); if (in_recovery == 1) RETURN(-EALREADY); - down(&imp->imp_recovery_sem); - /* If recovery happened while we waited, we're done. */ - if (imp->imp_state == LUSTRE_IMP_FULL) - GOTO(out, rc = 0); - - LASSERT (imp->imp_state == LUSTRE_IMP_DISCON); - - if (new_uuid) { - struct ptlrpc_connection *conn; - struct obd_uuid uuid; - struct ptlrpc_peer peer; - struct obd_export *dlmexp; - - obd_str2uuid(&uuid, new_uuid); - if (ptlrpc_uuid_to_peer(&uuid, &peer)) { - CERROR("no connection found for UUID %s\n", new_uuid); - GOTO(out, rc = -EINVAL); - } - - conn = ptlrpc_get_connection(&peer, &uuid); - if (!conn) - GOTO(out, rc = -ENOMEM); - - CDEBUG(D_HA, "switching import %s/%s from %s to %s\n", - imp->imp_target_uuid.uuid, imp->imp_obd->obd_name, - imp->imp_connection->c_remote_uuid.uuid, - conn->c_remote_uuid.uuid); - - /* Switch the import's connection and the DLM export's - * connection (which are almost certainly the same, but we - * keep distinct refs just to make things clearer. I think. */ - if (imp->imp_connection) - ptlrpc_put_connection(imp->imp_connection); - /* We hand off the ref from ptlrpc_get_connection. */ - imp->imp_connection = conn; - - dlmexp = class_conn2export(&imp->imp_dlm_handle); - if (dlmexp->exp_connection) - ptlrpc_put_connection(dlmexp->exp_connection); - dlmexp->exp_connection = ptlrpc_connection_addref(conn); - class_export_put(dlmexp); - - } - - connect: - rc = ptlrpc_connect_import(imp); - - if (rc < 0) { - CERROR("failed to reconnect to %s@%s: %d\n", - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid, rc); - GOTO(out, rc); - } - - if (imp->imp_state == LUSTRE_IMP_EVICTED) { - CDEBUG(D_HA, "evicted from %s@%s; invalidating\n", - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); - ptlrpc_set_import_active(imp, 0); - CDEBUG(D_HA, "%s: new state: RECOVER\n", - imp->imp_client->cli_name); - imp->imp_state = LUSTRE_IMP_RECOVER; - } - - if (imp->imp_state == LUSTRE_IMP_REPLAY) { - CDEBUG(D_HA, "replay requested by %s\n", - imp->imp_target_uuid.uuid); - rc = ptlrpc_replay(imp); - if (rc) - GOTO(out, rc); - - rc = ldlm_replay_locks(imp); - if (rc) - GOTO(out, rc); - - rc = signal_completed_replay(imp); - if (rc) - GOTO(out, rc); - CDEBUG(D_HA, "%s: new state: RECOVER\n", - imp->imp_client->cli_name); - imp->imp_state = LUSTRE_IMP_RECOVER; - } - - if (imp->imp_state == LUSTRE_IMP_RECOVER) { - CDEBUG(D_HA, "reconnected to %s@%s\n", - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); - - ptlrpc_set_import_active(imp, 1); - ptlrpc_resend(imp); - spin_lock_irqsave(&imp->imp_lock, flags); - CDEBUG(D_HA, "%s: new state: FULL\n", - imp->imp_client->cli_name); - imp->imp_state = LUSTRE_IMP_FULL; - spin_unlock_irqrestore(&imp->imp_lock, flags); - ptlrpc_wake_delayed(imp); - } + rc = ptlrpc_connect_import(imp, new_uuid); + if (rc) + RETURN(rc); + CDEBUG(D_ERROR, "%s: recovery started, waiting\n", + imp->imp_target_uuid.uuid); - LASSERT(imp->imp_state == LUSTRE_IMP_FULL); + lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL); + rc = l_wait_event(imp->imp_recovery_waitq, + !ptlrpc_import_in_recovery(imp), &lwi); + CDEBUG(D_ERROR, "%s: recovery finished\n", + imp->imp_target_uuid.uuid); - out: - if (rc != 0) { - spin_lock_irqsave(&imp->imp_lock, flags); - imp->imp_state = LUSTRE_IMP_DISCON; - spin_unlock_irqrestore(&imp->imp_lock, flags); - - if (rc == -ENOTCONN) { - CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery;" - "invalidating and reconnecting\n", - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); - GOTO(connect, -ENOTCONN); - } else if (was_invalid) { - ptlrpc_set_import_active(imp, 0); - } - } - up(&imp->imp_recovery_sem); RETURN(rc); }