X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fptlrpc%2Frpc.c;h=25d644953d56d2cb361703e27094a427389fbf39;hb=762e5b7708660eb86c614b6ac1c5aaa6bf7acc4c;hp=742e4607801103c34dd7b83d9e217279cd937709;hpb=cc4bd2873eab77210c69df32be466dae78479ad1;p=fs%2Flustre-release.git diff --git a/lustre/ptlrpc/rpc.c b/lustre/ptlrpc/rpc.c index 742e460..25d6449 100644 --- a/lustre/ptlrpc/rpc.c +++ b/lustre/ptlrpc/rpc.c @@ -28,6 +28,7 @@ #include #include #include +#include #include extern int ptlrpc_init_portals(void); @@ -76,10 +77,10 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg, ENTRY; - if (cmd != OBD_IOC_RECOVD_NEWCONN) - RETURN(0); + if (cmd != OBD_IOC_RECOVD_NEWCONN && cmd != OBD_IOC_RECOVD_FAILCONN) + RETURN(-EINVAL); /* XXX ENOSYS? */ - /* Find the connection that's been rebuilt. */ + /* Find the connection that's been rebuilt or has failed. */ spin_lock(&recovd->recovd_lock); list_for_each(tmp, &recovd->recovd_troubled_items) { conn = list_entry(tmp, struct ptlrpc_connection, @@ -92,13 +93,49 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg, conn = NULL; } - if (!conn) - GOTO(out, rc = -EINVAL); + if (!conn) { + if (cmd == OBD_IOC_RECOVD_NEWCONN) + GOTO(out, rc = -EINVAL); + /* XXX macroize/inline and share with loop above */ + list_for_each(tmp, &recovd->recovd_managed_items) { + conn = list_entry(tmp, struct ptlrpc_connection, + c_recovd_data.rd_managed_chain); + + LASSERT(conn->c_recovd_data.rd_recovd == recovd); + + if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1)) + break; + conn = NULL; + } + if (!conn) + GOTO(out, rc = -EINVAL); + } - if (conn->c_recovd_data.rd_phase != RECOVD_PREPARING) - GOTO(out, rc = -EALREADY); + if (cmd == OBD_IOC_RECOVD_FAILCONN) { + spin_unlock(&recovd->recovd_lock); + recovd_conn_fail(conn); + spin_lock(&recovd->recovd_lock); + + /* Jump straight to the "failed" phase of recovery. */ + conn->c_recovd_data.rd_phase = RD_FAILED; + goto out; + } + + /* else (NEWCONN) */ spin_lock(&conn->c_lock); + + /* whatever happens, reset the INVALID flag */ + conn->c_flags &= ~CONN_INVALID; + + /* XXX is this a good check? should we allow readdressing of + * XXX conns that aren't in recovery? + */ + if (conn->c_recovd_data.rd_phase != RD_PREPARING) { + spin_unlock(&conn->c_lock); + GOTO(out, rc = -EALREADY); + } + if (data->ioc_inllen2) { CERROR("conn %p UUID change %s -> %s\n", conn, conn->c_remote_uuid, data->ioc_inlbuf2); @@ -110,20 +147,26 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg, ptlrpc_readdress_connection(conn, conn->c_remote_uuid); spin_unlock(&conn->c_lock); - conn->c_recovd_data.rd_phase = RECOVD_PREPARED; + conn->c_recovd_data.rd_phase = RD_PREPARED; wake_up(&recovd->recovd_waitq); out: spin_unlock(&recovd->recovd_lock); RETURN(rc); } +static int connmgr_connect(struct lustre_handle *conn, struct obd_device *src, + obd_uuid_t cluuid, struct recovd_obd *recovd, + ptlrpc_recovery_cb_t recover) +{ + return class_connect(conn, src, cluuid); +} /* use obd ops to offer management infrastructure */ static struct obd_ops recovd_obd_ops = { o_setup: connmgr_setup, o_cleanup: connmgr_cleanup, o_iocontrol: connmgr_iocontrol, - o_connect: class_connect, + o_connect: connmgr_connect, o_disconnect: class_disconnect }; @@ -201,8 +244,9 @@ EXPORT_SYMBOL(lustre_msg_size); EXPORT_SYMBOL(lustre_unpack_msg); EXPORT_SYMBOL(lustre_msg_buf); -EXPORT_SYMBOL(ll_recover); - +/* recover.c */ +EXPORT_SYMBOL(ptlrpc_run_recovery_upcall); +EXPORT_SYMBOL(ptlrpc_reconnect_and_replay); MODULE_AUTHOR("Cluster File Systems, Inc "); MODULE_DESCRIPTION("Lustre Request Processor v1.0");