#include <linux/obd_class.h>
#include <linux/lustre_lib.h>
#include <linux/lustre_ha.h>
+#include <linux/lustre_net.h>
#include <linux/init.h>
extern int ptlrpc_init_portals(void);
ENTRY;
- if (cmd != OBD_IOC_RECOVD_NEWCONN)
- RETURN(0);
+ if (cmd != OBD_IOC_RECOVD_NEWCONN && cmd != OBD_IOC_RECOVD_FAILCONN)
+ RETURN(-EINVAL); /* XXX ENOSYS? */
- /* Find the connection that's been rebuilt. */
+ /* Find the connection that's been rebuilt or has failed. */
spin_lock(&recovd->recovd_lock);
list_for_each(tmp, &recovd->recovd_troubled_items) {
conn = list_entry(tmp, struct ptlrpc_connection,
conn = NULL;
}
- if (!conn)
- GOTO(out, rc = -EINVAL);
+ if (!conn) {
+ if (cmd == OBD_IOC_RECOVD_NEWCONN)
+ GOTO(out, rc = -EINVAL);
+ /* XXX macroize/inline and share with loop above */
+ list_for_each(tmp, &recovd->recovd_managed_items) {
+ conn = list_entry(tmp, struct ptlrpc_connection,
+ c_recovd_data.rd_managed_chain);
+
+ LASSERT(conn->c_recovd_data.rd_recovd == recovd);
+
+ if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
+ break;
+ conn = NULL;
+ }
+ if (!conn)
+ GOTO(out, rc = -EINVAL);
+ }
- if (conn->c_recovd_data.rd_phase != RECOVD_PREPARING)
- GOTO(out, rc = -EALREADY);
+ if (cmd == OBD_IOC_RECOVD_FAILCONN) {
+ spin_unlock(&recovd->recovd_lock);
+ recovd_conn_fail(conn);
+ spin_lock(&recovd->recovd_lock);
+
+ /* Jump straight to the "failed" phase of recovery. */
+ conn->c_recovd_data.rd_phase = RD_FAILED;
+ goto out;
+ }
+
+ /* else (NEWCONN) */
spin_lock(&conn->c_lock);
+
+ /* whatever happens, reset the INVALID flag */
+ conn->c_flags &= ~CONN_INVALID;
+
+ /* XXX is this a good check? should we allow readdressing of
+ * XXX conns that aren't in recovery?
+ */
+ if (conn->c_recovd_data.rd_phase != RD_PREPARING) {
+ spin_unlock(&conn->c_lock);
+ GOTO(out, rc = -EALREADY);
+ }
+
if (data->ioc_inllen2) {
CERROR("conn %p UUID change %s -> %s\n",
conn, conn->c_remote_uuid, data->ioc_inlbuf2);
ptlrpc_readdress_connection(conn, conn->c_remote_uuid);
spin_unlock(&conn->c_lock);
- conn->c_recovd_data.rd_phase = RECOVD_PREPARED;
+ conn->c_recovd_data.rd_phase = RD_PREPARED;
wake_up(&recovd->recovd_waitq);
out:
spin_unlock(&recovd->recovd_lock);
RETURN(rc);
}
+static int connmgr_connect(struct lustre_handle *conn, struct obd_device *src,
+ obd_uuid_t cluuid, struct recovd_obd *recovd,
+ ptlrpc_recovery_cb_t recover)
+{
+ return class_connect(conn, src, cluuid);
+}
/* use obd ops to offer management infrastructure */
static struct obd_ops recovd_obd_ops = {
o_setup: connmgr_setup,
o_cleanup: connmgr_cleanup,
o_iocontrol: connmgr_iocontrol,
- o_connect: class_connect,
+ o_connect: connmgr_connect,
o_disconnect: class_disconnect
};
EXPORT_SYMBOL(lustre_unpack_msg);
EXPORT_SYMBOL(lustre_msg_buf);
-EXPORT_SYMBOL(ll_recover);
-
+/* recover.c */
+EXPORT_SYMBOL(ptlrpc_run_recovery_upcall);
+EXPORT_SYMBOL(ptlrpc_reconnect_and_replay);
MODULE_AUTHOR("Cluster File Systems, Inc <info@clusterfs.com>");
MODULE_DESCRIPTION("Lustre Request Processor v1.0");