struct recovd_data;
struct recovd_obd;
+struct obd_import;
struct ptlrpc_connection;
/* rd_phase/rd_next_phase values */
typedef int (*ptlrpc_recovery_cb_t)(struct recovd_data *, int);
struct recovd_data {
+ /* you must hold recovd->recovd_lock when touching rd_managed_chain */
struct list_head rd_managed_chain;
ptlrpc_recovery_cb_t rd_recover;
struct recovd_obd *rd_recovd;
extern struct recovd_obd *ptlrpc_recovd;
int ptlrpc_run_recovery_upcall(struct ptlrpc_connection *conn);
-int ptlrpc_reconnect_and_replay(struct ptlrpc_connection *conn);
+int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc);
+int ptlrpc_replay(struct ptlrpc_connection *conn);
#endif
struct list_head imp_chain;
struct obd_device *imp_obd;
/* XXX need a UUID here, I think
- * XXX what about client_obd.cl_target_uuid?
*/
};
spin_lock(&export->exp_connection->c_lock);
list_add(&export->exp_conn_chain, &export->exp_connection->c_exports);
spin_unlock(&export->exp_connection->c_lock);
+ recovd_conn_manage(export->exp_connection, ptlrpc_recovd,
+ target_revoke_connection);
dlmimp = &export->exp_ldlm_data.led_import;
dlmimp->imp_connection = req->rq_connection;
dlmimp->imp_handle.cookie = req->rq_reqmsg->cookie;
dlmimp->imp_obd = /* LDLM! */ NULL;
-#warning Peter: is this the right place to upgrade the server connection level?
req->rq_connection->c_level = LUSTRE_CONN_FULL;
out:
req->rq_status = rc;
if (rc)
CERROR("disconnecting export %p failed: %d\n", exp, rc);
}
+
+ /* XXX spank the connection (it's frozen in _RECOVD for now!) */
RETURN(0);
}
#include <linux/lustre_lite.h>
#include <linux/lustre_ha.h>
+#include <linux/lustre_dlm.h>
+#include <linux/lustre_idl.h>
static int ll_retry_recovery(struct ptlrpc_connection *conn)
{
RETURN(0);
}
+/* XXX looks a lot like super.c:invalidate_request_list, don't it? */
+static void abort_inflight_for_import(struct obd_import *imp)
+{
+ struct list_head *tmp, *n;
+
+ list_for_each_safe(tmp, n, &imp->imp_connection->c_sending_head) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request, rq_list);
+ if (req->rq_flags & PTL_RPC_FL_REPLIED) {
+ /* no need to replay, just discard */
+ CERROR("uncommitted req xid "LPD64" op %d to OST %s\n",
+ (unsigned long long)req->rq_xid,
+ req->rq_reqmsg->opc,
+ imp->imp_obd->u.cli.cl_target_uuid);
+ ptlrpc_req_finished(req);
+ } else {
+ CERROR("inflight req xid "LPD64" op %d to OST %s\n",
+ (unsigned long long)req->rq_xid,
+ req->rq_reqmsg->opc,
+ imp->imp_obd->u.cli.cl_target_uuid);
+
+ req->rq_flags |= PTL_RPC_FL_ERR;
+ wake_up(&req->rq_wait_for_rep);
+ }
+ }
+
+ list_for_each_safe(tmp, n, &imp->imp_connection->c_delayed_head) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request, rq_list);
+ CERROR("aborting waiting req xid "LPD64" op %d to OST %s\n",
+ (unsigned long long)req->rq_xid, req->rq_reqmsg->opc,
+ imp->imp_obd->u.cli.cl_target_uuid);
+ req->rq_flags |= PTL_RPC_FL_ERR;
+ wake_up(&req->rq_wait_for_rep);
+ }
+}
+
+static void reconnect_ost(struct obd_import *imp)
+{
+ struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
+
+ CDEBUG(D_HA, "invalidating all locks for OST imp %p (to %s):\n",
+ imp, imp->imp_connection->c_remote_uuid);
+ ldlm_namespace_dump(ns);
+ ldlm_namespace_cleanup(ns, 1 /* no network ops */);
+
+ abort_inflight_for_import(imp);
+
+ (void)ptlrpc_reconnect_import(imp, OST_CONNECT);
+}
+
+static int ll_reconnect(struct ptlrpc_connection *conn)
+{
+ struct list_head *tmp;
+ int need_replay = 0;
+
+ ENTRY;
+
+ /* XXX c_lock semantics! */
+ conn->c_level = LUSTRE_CONN_CON;
+
+ /* XXX this code MUST be shared with class_obd_connect! */
+ list_for_each(tmp, &conn->c_imports) {
+ struct obd_import *imp = list_entry(tmp, struct obd_import,
+ imp_chain);
+ if (imp->imp_obd->obd_type->typ_ops->o_brw) {
+ /* XXX what to do if we fail? */
+ reconnect_ost(imp);
+ } else {
+ int rc = ptlrpc_reconnect_import(imp, MDS_CONNECT);
+ if (!rc)
+ need_replay = 1;
+ /* make sure we don't try to replay for dead imps?
+ *
+ * else imp->imp_connection = NULL;
+ *
+ */
+ }
+ }
+
+ if (!need_replay) {
+ /* all done! */
+ conn->c_level = LUSTRE_CONN_FULL;
+ RETURN(0);
+ }
+
+ conn->c_level = LUSTRE_CONN_RECOVD;
+ /* this will replay, up the c_level, recovd_conn_fixed and continue reqs.
+ * also, makes a mean cup of coffee.
+ */
+ RETURN(ptlrpc_replay(conn));
+}
+
int ll_recover(struct recovd_data *rd, int phase)
{
struct ptlrpc_connection *conn = class_rd2conn(rd);
case PTLRPC_RECOVD_PHASE_PREPARE:
RETURN(ptlrpc_run_recovery_upcall(conn));
case PTLRPC_RECOVD_PHASE_RECOVER:
- RETURN(ptlrpc_reconnect_and_replay(conn));
+ RETURN(ll_reconnect(conn));
case PTLRPC_RECOVD_PHASE_FAILURE:
RETURN(ll_retry_recovery(conn));
}
list_for_each_safe(tmp, n, req_list) {
struct ptlrpc_request *req =
list_entry(tmp, struct ptlrpc_request, rq_list);
- CERROR("invalidating req xid %d op %d to %s:%d\n",
+ CERROR("invalidating req xid "LPD64" op %d to %s:%d\n",
(unsigned long long)req->rq_xid, req->rq_reqmsg->opc,
req->rq_connection->c_remote_uuid,
req->rq_import->imp_client->cli_request_portal);
ldlm_lock2handle(lock, &lockh);
rc = ldlm_cli_cancel(&lockh);
if (rc < 0) {
- CERROR("ldlm_cli_cancel: %d\n", rc);
- LBUG();
+ CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
+ RETURN(rc);
}
break;
case LDLM_CB_CANCELING:
RETURN(NULL);
if (export->exp_cookie != conn->cookie)
- return NULL;
- return export;
+ RETURN(NULL);
+ RETURN(export);
} /* class_conn2export */
struct obd_device *class_conn2obd(struct lustre_handle *conn)
static int filter_disconnect(struct lustre_handle *conn)
{
+ struct obd_export *export = class_conn2export(conn);
int rc;
ENTRY;
+ ldlm_cancel_locks_for_export(export);
+
rc = class_disconnect(conn);
if (!rc)
MOD_DEC_USE_COUNT;
int rc = 0;
if (req->rq_repmsg != NULL) {
+ struct ptlrpc_connection *conn = req->rq_import->imp_connection;
+ spin_lock(&conn->c_lock);
+ if (req->rq_level > conn->c_level) {
+ CDEBUG(D_HA,
+ "rep to xid "LPD64" op %d to %s:%d: "
+ "recovery started, ignoring (%d > %d)\n",
+ (unsigned long long)req->rq_xid,
+ req->rq_reqmsg->opc, conn->c_remote_uuid,
+ req->rq_import->imp_client->cli_request_portal,
+ req->rq_level, conn->c_level);
+ req->rq_repmsg = NULL;
+ spin_unlock(&conn->c_lock);
+ GOTO(out, rc = 0);
+ }
+ spin_unlock(&conn->c_lock);
req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
req->rq_flags |= PTL_RPC_FL_REPLIED;
GOTO(out, rc = 1);
GOTO(out, rc = 1);
}
+ if (req->rq_flags & PTL_RPC_FL_ERR) {
+ CERROR("-- ABORTED --\n");
+ GOTO(out, rc = 1);
+ }
+
out:
CDEBUG(D_NET, "req = %p, rc = %d\n", req, rc);
return rc;
req->rq_connection->c_remote_uuid,
req->rq_import->imp_client->cli_request_portal);
+ if (req->rq_flags & PTL_RPC_FL_ERR) {
+ ptlrpc_abort(req);
+ GOTO(out, rc = -EIO);
+ }
+
/* Don't resend if we were interrupted. */
if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) ==
PTL_RPC_FL_RESEND) {
CDEBUG(D_INFO, "connection=%p refcount %d\n",
c, atomic_read(&c->c_refcount) - 1);
if (atomic_dec_and_test(&c->c_refcount)) {
+ recovd_conn_unmanage(c);
spin_lock(&conn_lock);
list_del(&c->c_link);
list_add(&c->c_link, &conn_unused_list);
EXIT;
}
+void recovd_conn_unmanage(struct ptlrpc_connection *conn)
+{
+ struct recovd_data *rd = &conn->c_recovd_data;
+ struct recovd_obd *recovd = rd->rd_recovd;
+ ENTRY;
+
+ if (recovd) {
+ spin_lock(&recovd->recovd_lock);
+ list_del(&rd->rd_managed_chain);
+ spin_unlock(&recovd->recovd_lock);
+ rd->rd_recovd = NULL;
+ }
+ /* should be safe enough, right? */
+ rd->rd_recover = NULL;
+ rd->rd_next_phase = RD_IDLE;
+ rd->rd_next_phase = RD_TROUBLED;
+}
+
void recovd_conn_fail(struct ptlrpc_connection *conn)
{
struct recovd_data *rd = &conn->c_recovd_data;
EXIT;
}
-
static int recovd_check_event(struct recovd_obd *recovd)
{
int rc = 0;
#include <linux/lustre_net.h>
#include <linux/obd.h>
-static int ptlrpc_reconnect(struct ptlrpc_connection *conn)
+int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc)
{
- struct list_head *tmp;
- int rc = -EINVAL;
-
- /* XXX c_lock semantics! */
- conn->c_level = LUSTRE_CONN_CON;
-
- /* XXX this code MUST be shared with class_obd_connect! */
- list_for_each(tmp, &conn->c_imports) {
- struct obd_import *imp = list_entry(tmp, struct obd_import,
- imp_chain);
- struct obd_device *obd = imp->imp_obd;
- struct client_obd *cli = &obd->u.cli;
- int rq_opc = (obd->obd_type->typ_ops->o_brw)
- ? OST_CONNECT : MDS_CONNECT;
- int size[] = { sizeof(cli->cl_target_uuid),
- sizeof(obd->obd_uuid) };
- char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid };
- struct lustre_handle old_hdl;
- struct ptlrpc_request *request;
- struct obd_export *ldlmexp;
-
- LASSERT(imp->imp_connection == conn);
- request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
- request->rq_level = LUSTRE_CONN_NEW;
- request->rq_replen = lustre_msg_size(0, NULL);
- /*
- * This address is the export that represents our client-side
- * LDLM service (for ASTs). We should only have one on this
- * list, so we just grab the first one.
- *
- * XXX tear down export, call class_obd_connect!
- */
- ldlmexp = list_entry(obd->obd_exports.next, struct obd_export,
- exp_obd_chain);
- request->rq_reqmsg->addr = (__u64)(unsigned long)ldlmexp;
- request->rq_reqmsg->cookie = ldlmexp->exp_cookie;
- rc = ptlrpc_queue_wait(request);
- rc = ptlrpc_check_status(request, rc);
- if (rc) {
- CERROR("cannot connect to %s@%s: rc = %d\n",
- cli->cl_target_uuid, conn->c_remote_uuid, rc);
- ptlrpc_free_req(request);
- GOTO(out_disc, rc = -ENOTCONN);
- }
+ struct obd_device *obd = imp->imp_obd;
+ struct client_obd *cli = &obd->u.cli;
+ int size[] = { sizeof(cli->cl_target_uuid), sizeof(obd->obd_uuid) };
+ char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid };
+ struct ptlrpc_connection *conn = imp->imp_connection;
+ struct lustre_handle old_hdl;
+ struct ptlrpc_request *request;
+ struct obd_export *ldlmexp;
+ int rc;
+
+ request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
+ request->rq_level = LUSTRE_CONN_NEW;
+ request->rq_replen = lustre_msg_size(0, NULL);
+ /*
- old_hdl = imp->imp_handle;
- imp->imp_handle.addr = request->rq_repmsg->addr;
- imp->imp_handle.cookie = request->rq_repmsg->cookie;
- CERROR("reconnected to %s@%s (%Lx/%Lx, was %Lx/%Lx)!\n",
- cli->cl_target_uuid, conn->c_remote_uuid,
- imp->imp_handle.addr, imp->imp_handle.cookie,
- old_hdl.addr, old_hdl.cookie);
- ptlrpc_req_finished(request);
+ * This address is the export that represents our client-side LDLM
+ * service (for ASTs). We should only have one on this list, so we
+ * just grab the first one.
+ *
+ * XXX tear down export, call class_obd_connect?
+ */
+ ldlmexp = list_entry(obd->obd_exports.next, struct obd_export,
+ exp_obd_chain);
+ request->rq_reqmsg->addr = (__u64)(unsigned long)ldlmexp;
+ request->rq_reqmsg->cookie = ldlmexp->exp_cookie;
+ rc = ptlrpc_queue_wait(request);
+ rc = ptlrpc_check_status(request, rc);
+ if (rc) {
+ CERROR("cannot connect to %s@%s: rc = %d\n",
+ cli->cl_target_uuid, conn->c_remote_uuid, rc);
+ ptlrpc_free_req(request);
+ GOTO(out_disc, rc = -ENOTCONN);
}
- conn->c_level = LUSTRE_CONN_RECOVD;
+
+ old_hdl = imp->imp_handle;
+ imp->imp_handle.addr = request->rq_repmsg->addr;
+ imp->imp_handle.cookie = request->rq_repmsg->cookie;
+ CERROR("reconnected to %s@%s (%Lx/%Lx, was %Lx/%Lx)!\n",
+ cli->cl_target_uuid, conn->c_remote_uuid,
+ imp->imp_handle.addr, imp->imp_handle.cookie,
+ old_hdl.addr, old_hdl.cookie);
+ ptlrpc_req_finished(request);
out_disc:
return rc;
RETURN(0);
}
-int ptlrpc_reconnect_and_replay(struct ptlrpc_connection *conn)
+int ptlrpc_replay(struct ptlrpc_connection *conn)
{
int rc = 0;
struct list_head *tmp, *pos;
struct ptlrpc_request *req;
ENTRY;
- /* 1. reconnect */
- rc = ptlrpc_reconnect(conn);
- if (rc)
- RETURN(rc);
-
- /* 2. walk the request list */
spin_lock(&conn->c_lock);
CDEBUG(D_HA, "connection %p to %s has last_xid "LPD64"\n",
/* recover.c */
EXPORT_SYMBOL(ptlrpc_run_recovery_upcall);
-EXPORT_SYMBOL(ptlrpc_reconnect_and_replay);
+EXPORT_SYMBOL(ptlrpc_reconnect_import);
+EXPORT_SYMBOL(ptlrpc_replay);
MODULE_AUTHOR("Cluster File Systems, Inc <info@clusterfs.com>");
MODULE_DESCRIPTION("Lustre Request Processor v1.0");