From 9e44a8a89812024d653aa5493b5e70d82d861c1c Mon Sep 17 00:00:00 2001 From: shaver Date: Sat, 19 Oct 2002 11:19:14 +0000 Subject: [PATCH] - Split import reconnection and replay (OSC only needs to reconnect). - Restore recovd-management of server-side connections (why did I take that out in the first place?). - Abort inflight, uncommitted and waiting requests for imports that are invalidated by reconnection to OST. (Generates -EIO, or should!) - Mete out harsh, harsh justice to locks held by recovery-invalidated imports. - Remove LBUG()s now that some error returns from ldlm_cli_cancel are "normal". - Cancel locks on disconnection from OST (really filter). - Ignore replies that come in after we've started recovery on a given connection. The socknal's implicit retransmission can otherwise cause us worlds and worlds of hurt. - Wake up when we get aborted, and return -EIO. - Remove connections from the recovd's care when they go unused. (Which might actually happen, once Phil lands his refcounting fixes!) --- lustre/include/linux/lustre_ha.h | 5 +- lustre/include/linux/lustre_import.h | 1 - lustre/lib/target.c | 5 +- lustre/llite/recover.c | 97 ++++++++++++++++++++++++++++++++- lustre/llite/super.c | 2 +- lustre/mdc/mdc_request.c | 4 +- lustre/obdclass/genops.c | 4 +- lustre/obdfilter/filter.c | 3 + lustre/ptlrpc/client.c | 25 +++++++++ lustre/ptlrpc/connection.c | 1 + lustre/ptlrpc/recovd.c | 19 ++++++- lustre/ptlrpc/recover.c | 103 ++++++++++++++--------------------- lustre/ptlrpc/rpc.c | 3 +- 13 files changed, 200 insertions(+), 72 deletions(-) diff --git a/lustre/include/linux/lustre_ha.h b/lustre/include/linux/lustre_ha.h index 09610b2..8afa1a8 100644 --- a/lustre/include/linux/lustre_ha.h +++ b/lustre/include/linux/lustre_ha.h @@ -9,6 +9,7 @@ struct recovd_data; struct recovd_obd; +struct obd_import; struct ptlrpc_connection; /* rd_phase/rd_next_phase values */ @@ -32,6 +33,7 @@ struct ptlrpc_connection; typedef int (*ptlrpc_recovery_cb_t)(struct recovd_data *, int); struct recovd_data { + /* you must hold recovd->recovd_lock when touching rd_managed_chain */ struct list_head rd_managed_chain; ptlrpc_recovery_cb_t rd_recover; struct recovd_obd *rd_recovd; @@ -50,6 +52,7 @@ int recovd_cleanup(struct recovd_obd *mgr); extern struct recovd_obd *ptlrpc_recovd; int ptlrpc_run_recovery_upcall(struct ptlrpc_connection *conn); -int ptlrpc_reconnect_and_replay(struct ptlrpc_connection *conn); +int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc); +int ptlrpc_replay(struct ptlrpc_connection *conn); #endif diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h index aa3e3d7..3a183e4 100644 --- a/lustre/include/linux/lustre_import.h +++ b/lustre/include/linux/lustre_import.h @@ -20,7 +20,6 @@ struct obd_import { struct list_head imp_chain; struct obd_device *imp_obd; /* XXX need a UUID here, I think - * XXX what about client_obd.cl_target_uuid? */ }; diff --git a/lustre/lib/target.c b/lustre/lib/target.c index 94665e0..8786ee8 100644 --- a/lustre/lib/target.c +++ b/lustre/lib/target.c @@ -90,6 +90,8 @@ int target_handle_connect(struct ptlrpc_request *req) spin_lock(&export->exp_connection->c_lock); list_add(&export->exp_conn_chain, &export->exp_connection->c_exports); spin_unlock(&export->exp_connection->c_lock); + recovd_conn_manage(export->exp_connection, ptlrpc_recovd, + target_revoke_connection); dlmimp = &export->exp_ldlm_data.led_import; dlmimp->imp_connection = req->rq_connection; @@ -98,7 +100,6 @@ int target_handle_connect(struct ptlrpc_request *req) dlmimp->imp_handle.cookie = req->rq_reqmsg->cookie; dlmimp->imp_obd = /* LDLM! */ NULL; -#warning Peter: is this the right place to upgrade the server connection level? req->rq_connection->c_level = LUSTRE_CONN_FULL; out: req->rq_status = rc; @@ -137,6 +138,8 @@ static int target_disconnect_client(struct ptlrpc_connection *conn) if (rc) CERROR("disconnecting export %p failed: %d\n", exp, rc); } + + /* XXX spank the connection (it's frozen in _RECOVD for now!) */ RETURN(0); } diff --git a/lustre/llite/recover.c b/lustre/llite/recover.c index 1d2f5ad..e88cedf 100644 --- a/lustre/llite/recover.c +++ b/lustre/llite/recover.c @@ -10,6 +10,8 @@ #include #include +#include +#include static int ll_retry_recovery(struct ptlrpc_connection *conn) { @@ -17,6 +19,99 @@ static int ll_retry_recovery(struct ptlrpc_connection *conn) RETURN(0); } +/* XXX looks a lot like super.c:invalidate_request_list, don't it? */ +static void abort_inflight_for_import(struct obd_import *imp) +{ + struct list_head *tmp, *n; + + list_for_each_safe(tmp, n, &imp->imp_connection->c_sending_head) { + struct ptlrpc_request *req = + list_entry(tmp, struct ptlrpc_request, rq_list); + if (req->rq_flags & PTL_RPC_FL_REPLIED) { + /* no need to replay, just discard */ + CERROR("uncommitted req xid "LPD64" op %d to OST %s\n", + (unsigned long long)req->rq_xid, + req->rq_reqmsg->opc, + imp->imp_obd->u.cli.cl_target_uuid); + ptlrpc_req_finished(req); + } else { + CERROR("inflight req xid "LPD64" op %d to OST %s\n", + (unsigned long long)req->rq_xid, + req->rq_reqmsg->opc, + imp->imp_obd->u.cli.cl_target_uuid); + + req->rq_flags |= PTL_RPC_FL_ERR; + wake_up(&req->rq_wait_for_rep); + } + } + + list_for_each_safe(tmp, n, &imp->imp_connection->c_delayed_head) { + struct ptlrpc_request *req = + list_entry(tmp, struct ptlrpc_request, rq_list); + CERROR("aborting waiting req xid "LPD64" op %d to OST %s\n", + (unsigned long long)req->rq_xid, req->rq_reqmsg->opc, + imp->imp_obd->u.cli.cl_target_uuid); + req->rq_flags |= PTL_RPC_FL_ERR; + wake_up(&req->rq_wait_for_rep); + } +} + +static void reconnect_ost(struct obd_import *imp) +{ + struct ldlm_namespace *ns = imp->imp_obd->obd_namespace; + + CDEBUG(D_HA, "invalidating all locks for OST imp %p (to %s):\n", + imp, imp->imp_connection->c_remote_uuid); + ldlm_namespace_dump(ns); + ldlm_namespace_cleanup(ns, 1 /* no network ops */); + + abort_inflight_for_import(imp); + + (void)ptlrpc_reconnect_import(imp, OST_CONNECT); +} + +static int ll_reconnect(struct ptlrpc_connection *conn) +{ + struct list_head *tmp; + int need_replay = 0; + + ENTRY; + + /* XXX c_lock semantics! */ + conn->c_level = LUSTRE_CONN_CON; + + /* XXX this code MUST be shared with class_obd_connect! */ + list_for_each(tmp, &conn->c_imports) { + struct obd_import *imp = list_entry(tmp, struct obd_import, + imp_chain); + if (imp->imp_obd->obd_type->typ_ops->o_brw) { + /* XXX what to do if we fail? */ + reconnect_ost(imp); + } else { + int rc = ptlrpc_reconnect_import(imp, MDS_CONNECT); + if (!rc) + need_replay = 1; + /* make sure we don't try to replay for dead imps? + * + * else imp->imp_connection = NULL; + * + */ + } + } + + if (!need_replay) { + /* all done! */ + conn->c_level = LUSTRE_CONN_FULL; + RETURN(0); + } + + conn->c_level = LUSTRE_CONN_RECOVD; + /* this will replay, up the c_level, recovd_conn_fixed and continue reqs. + * also, makes a mean cup of coffee. + */ + RETURN(ptlrpc_replay(conn)); +} + int ll_recover(struct recovd_data *rd, int phase) { struct ptlrpc_connection *conn = class_rd2conn(rd); @@ -28,7 +123,7 @@ int ll_recover(struct recovd_data *rd, int phase) case PTLRPC_RECOVD_PHASE_PREPARE: RETURN(ptlrpc_run_recovery_upcall(conn)); case PTLRPC_RECOVD_PHASE_RECOVER: - RETURN(ptlrpc_reconnect_and_replay(conn)); + RETURN(ll_reconnect(conn)); case PTLRPC_RECOVD_PHASE_FAILURE: RETURN(ll_retry_recovery(conn)); } diff --git a/lustre/llite/super.c b/lustre/llite/super.c index 53c8015..d8bf664 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -533,7 +533,7 @@ static inline void invalidate_request_list(struct list_head *req_list) list_for_each_safe(tmp, n, req_list) { struct ptlrpc_request *req = list_entry(tmp, struct ptlrpc_request, rq_list); - CERROR("invalidating req xid %d op %d to %s:%d\n", + CERROR("invalidating req xid "LPD64" op %d to %s:%d\n", (unsigned long long)req->rq_xid, req->rq_reqmsg->opc, req->rq_connection->c_remote_uuid, req->rq_import->imp_client->cli_request_portal); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index af165ea..1fb85e2 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -175,8 +175,8 @@ static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, ldlm_lock2handle(lock, &lockh); rc = ldlm_cli_cancel(&lockh); if (rc < 0) { - CERROR("ldlm_cli_cancel: %d\n", rc); - LBUG(); + CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); + RETURN(rc); } break; case LDLM_CB_CANCELING: diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 487c932..6750092 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -342,8 +342,8 @@ struct obd_export *class_conn2export(struct lustre_handle *conn) RETURN(NULL); if (export->exp_cookie != conn->cookie) - return NULL; - return export; + RETURN(NULL); + RETURN(export); } /* class_conn2export */ struct obd_device *class_conn2obd(struct lustre_handle *conn) diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 62f3954..f87a392 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -353,9 +353,12 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, static int filter_disconnect(struct lustre_handle *conn) { + struct obd_export *export = class_conn2export(conn); int rc; ENTRY; + ldlm_cancel_locks_for_export(export); + rc = class_disconnect(conn); if (!rc) MOD_DEC_USE_COUNT; diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 2459ad3..d86c807 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -251,6 +251,21 @@ static int ptlrpc_check_reply(struct ptlrpc_request *req) int rc = 0; if (req->rq_repmsg != NULL) { + struct ptlrpc_connection *conn = req->rq_import->imp_connection; + spin_lock(&conn->c_lock); + if (req->rq_level > conn->c_level) { + CDEBUG(D_HA, + "rep to xid "LPD64" op %d to %s:%d: " + "recovery started, ignoring (%d > %d)\n", + (unsigned long long)req->rq_xid, + req->rq_reqmsg->opc, conn->c_remote_uuid, + req->rq_import->imp_client->cli_request_portal, + req->rq_level, conn->c_level); + req->rq_repmsg = NULL; + spin_unlock(&conn->c_lock); + GOTO(out, rc = 0); + } + spin_unlock(&conn->c_lock); req->rq_transno = NTOH__u64(req->rq_repmsg->transno); req->rq_flags |= PTL_RPC_FL_REPLIED; GOTO(out, rc = 1); @@ -261,6 +276,11 @@ static int ptlrpc_check_reply(struct ptlrpc_request *req) GOTO(out, rc = 1); } + if (req->rq_flags & PTL_RPC_FL_ERR) { + CERROR("-- ABORTED --\n"); + GOTO(out, rc = 1); + } + out: CDEBUG(D_NET, "req = %p, rc = %d\n", req, rc); return rc; @@ -575,6 +595,11 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) req->rq_connection->c_remote_uuid, req->rq_import->imp_client->cli_request_portal); + if (req->rq_flags & PTL_RPC_FL_ERR) { + ptlrpc_abort(req); + GOTO(out, rc = -EIO); + } + /* Don't resend if we were interrupted. */ if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) == PTL_RPC_FL_RESEND) { diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c index a5528fd..df2a2c2 100644 --- a/lustre/ptlrpc/connection.c +++ b/lustre/ptlrpc/connection.c @@ -121,6 +121,7 @@ int ptlrpc_put_connection(struct ptlrpc_connection *c) CDEBUG(D_INFO, "connection=%p refcount %d\n", c, atomic_read(&c->c_refcount) - 1); if (atomic_dec_and_test(&c->c_refcount)) { + recovd_conn_unmanage(c); spin_lock(&conn_lock); list_del(&c->c_link); list_add(&c->c_link, &conn_unused_list); diff --git a/lustre/ptlrpc/recovd.c b/lustre/ptlrpc/recovd.c index 02df21a..336633f 100644 --- a/lustre/ptlrpc/recovd.c +++ b/lustre/ptlrpc/recovd.c @@ -85,6 +85,24 @@ void recovd_conn_manage(struct ptlrpc_connection *conn, EXIT; } +void recovd_conn_unmanage(struct ptlrpc_connection *conn) +{ + struct recovd_data *rd = &conn->c_recovd_data; + struct recovd_obd *recovd = rd->rd_recovd; + ENTRY; + + if (recovd) { + spin_lock(&recovd->recovd_lock); + list_del(&rd->rd_managed_chain); + spin_unlock(&recovd->recovd_lock); + rd->rd_recovd = NULL; + } + /* should be safe enough, right? */ + rd->rd_recover = NULL; + rd->rd_next_phase = RD_IDLE; + rd->rd_next_phase = RD_TROUBLED; +} + void recovd_conn_fail(struct ptlrpc_connection *conn) { struct recovd_data *rd = &conn->c_recovd_data; @@ -137,7 +155,6 @@ void recovd_conn_fixed(struct ptlrpc_connection *conn) EXIT; } - static int recovd_check_event(struct recovd_obd *recovd) { int rc = 0; diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 0b3a1b8..a4fb6c7 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -24,63 +24,50 @@ #include #include -static int ptlrpc_reconnect(struct ptlrpc_connection *conn) +int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc) { - struct list_head *tmp; - int rc = -EINVAL; - - /* XXX c_lock semantics! */ - conn->c_level = LUSTRE_CONN_CON; - - /* XXX this code MUST be shared with class_obd_connect! */ - list_for_each(tmp, &conn->c_imports) { - struct obd_import *imp = list_entry(tmp, struct obd_import, - imp_chain); - struct obd_device *obd = imp->imp_obd; - struct client_obd *cli = &obd->u.cli; - int rq_opc = (obd->obd_type->typ_ops->o_brw) - ? OST_CONNECT : MDS_CONNECT; - int size[] = { sizeof(cli->cl_target_uuid), - sizeof(obd->obd_uuid) }; - char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid }; - struct lustre_handle old_hdl; - struct ptlrpc_request *request; - struct obd_export *ldlmexp; - - LASSERT(imp->imp_connection == conn); - request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp); - request->rq_level = LUSTRE_CONN_NEW; - request->rq_replen = lustre_msg_size(0, NULL); - /* - * This address is the export that represents our client-side - * LDLM service (for ASTs). We should only have one on this - * list, so we just grab the first one. - * - * XXX tear down export, call class_obd_connect! - */ - ldlmexp = list_entry(obd->obd_exports.next, struct obd_export, - exp_obd_chain); - request->rq_reqmsg->addr = (__u64)(unsigned long)ldlmexp; - request->rq_reqmsg->cookie = ldlmexp->exp_cookie; - rc = ptlrpc_queue_wait(request); - rc = ptlrpc_check_status(request, rc); - if (rc) { - CERROR("cannot connect to %s@%s: rc = %d\n", - cli->cl_target_uuid, conn->c_remote_uuid, rc); - ptlrpc_free_req(request); - GOTO(out_disc, rc = -ENOTCONN); - } + struct obd_device *obd = imp->imp_obd; + struct client_obd *cli = &obd->u.cli; + int size[] = { sizeof(cli->cl_target_uuid), sizeof(obd->obd_uuid) }; + char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid }; + struct ptlrpc_connection *conn = imp->imp_connection; + struct lustre_handle old_hdl; + struct ptlrpc_request *request; + struct obd_export *ldlmexp; + int rc; + + request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp); + request->rq_level = LUSTRE_CONN_NEW; + request->rq_replen = lustre_msg_size(0, NULL); + /* - old_hdl = imp->imp_handle; - imp->imp_handle.addr = request->rq_repmsg->addr; - imp->imp_handle.cookie = request->rq_repmsg->cookie; - CERROR("reconnected to %s@%s (%Lx/%Lx, was %Lx/%Lx)!\n", - cli->cl_target_uuid, conn->c_remote_uuid, - imp->imp_handle.addr, imp->imp_handle.cookie, - old_hdl.addr, old_hdl.cookie); - ptlrpc_req_finished(request); + * This address is the export that represents our client-side LDLM + * service (for ASTs). We should only have one on this list, so we + * just grab the first one. + * + * XXX tear down export, call class_obd_connect? + */ + ldlmexp = list_entry(obd->obd_exports.next, struct obd_export, + exp_obd_chain); + request->rq_reqmsg->addr = (__u64)(unsigned long)ldlmexp; + request->rq_reqmsg->cookie = ldlmexp->exp_cookie; + rc = ptlrpc_queue_wait(request); + rc = ptlrpc_check_status(request, rc); + if (rc) { + CERROR("cannot connect to %s@%s: rc = %d\n", + cli->cl_target_uuid, conn->c_remote_uuid, rc); + ptlrpc_free_req(request); + GOTO(out_disc, rc = -ENOTCONN); } - conn->c_level = LUSTRE_CONN_RECOVD; + + old_hdl = imp->imp_handle; + imp->imp_handle.addr = request->rq_repmsg->addr; + imp->imp_handle.cookie = request->rq_repmsg->cookie; + CERROR("reconnected to %s@%s (%Lx/%Lx, was %Lx/%Lx)!\n", + cli->cl_target_uuid, conn->c_remote_uuid, + imp->imp_handle.addr, imp->imp_handle.cookie, + old_hdl.addr, old_hdl.cookie); + ptlrpc_req_finished(request); out_disc: return rc; @@ -121,19 +108,13 @@ int ptlrpc_run_recovery_upcall(struct ptlrpc_connection *conn) RETURN(0); } -int ptlrpc_reconnect_and_replay(struct ptlrpc_connection *conn) +int ptlrpc_replay(struct ptlrpc_connection *conn) { int rc = 0; struct list_head *tmp, *pos; struct ptlrpc_request *req; ENTRY; - /* 1. reconnect */ - rc = ptlrpc_reconnect(conn); - if (rc) - RETURN(rc); - - /* 2. walk the request list */ spin_lock(&conn->c_lock); CDEBUG(D_HA, "connection %p to %s has last_xid "LPD64"\n", diff --git a/lustre/ptlrpc/rpc.c b/lustre/ptlrpc/rpc.c index 98be8ca..cb89c76 100644 --- a/lustre/ptlrpc/rpc.c +++ b/lustre/ptlrpc/rpc.c @@ -247,7 +247,8 @@ EXPORT_SYMBOL(lustre_msg_buf); /* recover.c */ EXPORT_SYMBOL(ptlrpc_run_recovery_upcall); -EXPORT_SYMBOL(ptlrpc_reconnect_and_replay); +EXPORT_SYMBOL(ptlrpc_reconnect_import); +EXPORT_SYMBOL(ptlrpc_replay); MODULE_AUTHOR("Cluster File Systems, Inc "); MODULE_DESCRIPTION("Lustre Request Processor v1.0"); -- 1.8.3.1