/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * linux/mds/handler.c
+ * obd/rpc/recovd.c
*
* Lustre High Availability Daemon
*
*
*/
-#define EXPORT_SYMTAB
-
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/locks.h>
-#include <linux/kmod.h>
-#include <linux/quotaops.h>
-#include <asm/unistd.h>
-#include <asm/uaccess.h>
-
#define DEBUG_SUBSYSTEM S_RPC
#include <linux/lustre_lite.h>
#include <linux/lustre_ha.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_net.h>
-
-struct connmgr_obd *ptlrpc_connmgr;
-
-void connmgr_cli_manage(struct connmgr_obd *mgr, struct ptlrpc_client *cli)
-{
- ENTRY;
- cli->cli_ha_mgr = mgr;
- spin_lock(&mgr->mgr_lock);
- list_add(&cli->cli_ha_item, &mgr->mgr_connections_lh);
- spin_unlock(&mgr->mgr_lock);
- EXIT;
-}
+#include <linux/obd_support.h>
-
-void connmgr_cli_fail(struct ptlrpc_client *cli)
+void recovd_conn_manage(struct ptlrpc_connection *conn,
+ struct recovd_obd *recovd, ptlrpc_recovery_cb_t recover)
{
+ struct recovd_data *rd = &conn->c_recovd_data;
ENTRY;
- spin_lock(&cli->cli_ha_mgr->mgr_lock);
- cli->cli_ha_mgr->mgr_flags |= SVC_HA_EVENT;
- list_del(&cli->cli_ha_item);
- list_add(&cli->cli_ha_item, &cli->cli_ha_mgr->mgr_troubled_lh);
- spin_unlock(&cli->cli_ha_mgr->mgr_lock);
- wake_up(&cli->cli_ha_mgr->mgr_waitq);
- EXIT;
-}
-
-int connmgr_upcall(void)
-{
- char *argv[2];
- char *envp[3];
-
- argv[0] = "/usr/src/obd/utils/ha_assist.sh";
- argv[1] = NULL;
- envp [0] = "HOME=/";
- envp [1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
- envp [2] = NULL;
-
- return call_usermodehelper(argv[0], argv, envp);
-}
+ rd->rd_recovd = recovd;
+ rd->rd_recover = recover;
-static void connmgr_unpack_body(struct ptlrpc_request *req)
-{
- struct connmgr_body *b = lustre_msg_buf(req->rq_repmsg, 0);
- if (b == NULL)
- LBUG();
+ spin_lock(&recovd->recovd_lock);
+ list_add(&rd->rd_managed_chain, &recovd->recovd_managed_items);
+ spin_unlock(&recovd->recovd_lock);
- b->generation = NTOH__u32(b->generation);
+ EXIT;
}
-int connmgr_connect(struct connmgr_obd *mgr,
- struct ptlrpc_connection *conn)
+void recovd_conn_fail(struct ptlrpc_connection *conn)
{
- struct ptlrpc_request *req;
- struct ptlrpc_client *cl;
- struct connmgr_body *body;
- int rc, size = sizeof(*body);
+ struct recovd_data *rd = &conn->c_recovd_data;
+ struct recovd_obd *recovd = rd->rd_recovd;
ENTRY;
- if (!mgr) {
- CERROR("no manager\n");
- LBUG();
+ if (!recovd) {
+ CERROR("no recovd for connection %p\n", conn);
+ return;
}
- cl = mgr->mgr_client;
-
- req = ptlrpc_prep_req(cl, conn, CONNMGR_CONNECT, 1, &size, NULL);
- if (!req)
- GOTO(out, rc = -ENOMEM);
-
- body = lustre_msg_buf(req->rq_reqmsg, 0);
- body->generation = HTON__u32(conn->c_generation);
- req->rq_replen = lustre_msg_size(1, &size);
+ spin_lock(&recovd->recovd_lock);
+ list_del(&rd->rd_managed_chain);
+ list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
+ spin_unlock(&recovd->recovd_lock);
- rc = ptlrpc_queue_wait(req);
- rc = ptlrpc_check_status(req, rc);
-
- if (!rc) {
- connmgr_unpack_body(req);
- body = lustre_msg_buf(req->rq_repmsg, 0);
- CDEBUG(D_NET, "mode: %o\n", body->generation);
- }
+ wake_up(&recovd->recovd_waitq);
EXIT;
- out:
- return rc;
}
-
-int connmgr_handle_connect(struct ptlrpc_request *req)
+/* this function must be called with conn->c_lock held */
+void recovd_conn_fixed(struct ptlrpc_connection *conn)
{
- struct connmgr_body *body;
- int rc, size = sizeof(*body);
+ struct recovd_data *rd = &conn->c_recovd_data;
ENTRY;
- rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
- if (rc) {
- CERROR("connmgr: out of memory\n");
- req->rq_status = -ENOMEM;
- RETURN(0);
- }
-
- body = lustre_msg_buf(req->rq_reqmsg, 0);
- connmgr_unpack_body(req);
+ list_del(&rd->rd_managed_chain);
+ list_add(&rd->rd_managed_chain, &rd->rd_recovd->recovd_managed_items);
- printk("incoming generation %d\n", body->generation);
- body = lustre_msg_buf(req->rq_repmsg, 0);
- body->generation = 4711;
- RETURN(0);
+ EXIT;
}
-int connmgr_handle(struct obd_device *dev,
- struct ptlrpc_service *svc,
- struct ptlrpc_request *req)
+
+static int recovd_check_event(struct recovd_obd *recovd)
{
- int rc;
+ int rc = 0;
ENTRY;
- rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
- if (rc) {
- CERROR("lustre_mds: Invalid request\n");
- GOTO(out, rc);
- }
+ spin_lock(&recovd->recovd_lock);
- if (req->rq_reqmsg->type != PTL_RPC_REQUEST) {
- CERROR("lustre_mds: wrong packet type sent %d\n",
- req->rq_reqmsg->type);
- GOTO(out, rc = -EINVAL);
+ if (recovd->recovd_phase == RECOVD_IDLE &&
+ !list_empty(&recovd->recovd_troubled_items)) {
+ GOTO(out, rc = 1);
}
- switch (req->rq_reqmsg->opc) {
- case CONNMGR_CONNECT:
- CDEBUG(D_INODE, "getattr\n");
- OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NET, 0);
- rc = connmgr_handle_connect(req);
- break;
+ if (recovd->recovd_flags & RECOVD_STOPPING)
+ GOTO(out, rc = 1);
- default:
- rc = ptlrpc_error(svc, req);
- RETURN(rc);
+ if (recovd->recovd_flags & RECOVD_FAILED) {
+ LASSERT(recovd->recovd_phase != RECOVD_IDLE &&
+ recovd->recovd_current_rd);
+ GOTO(out, rc = 1);
}
- EXIT;
-out:
- if (rc) {
- ptlrpc_error(svc, req);
- } else {
- CDEBUG(D_NET, "sending reply\n");
- ptlrpc_reply(svc, req);
- }
+ if (recovd->recovd_phase == recovd->recovd_next_phase)
+ GOTO(out, rc = 1);
- return 0;
+ out:
+ spin_unlock(&recovd->recovd_lock);
+ RETURN(rc);
}
-
-static int recovd_check_event(struct connmgr_obd *mgr)
+static int recovd_handle_event(struct recovd_obd *recovd)
{
- int rc = 0;
+ struct recovd_data *rd;
+ int rc;
ENTRY;
- spin_lock(&mgr->mgr_lock);
-
- if (!(mgr->mgr_flags & MGR_WORKING) &&
- !list_empty(&mgr->mgr_troubled_lh) ) {
-
- CERROR("connection in trouble - state: WORKING, upcall\n");
- mgr->mgr_flags = MGR_WORKING;
+ if (recovd->recovd_flags & RECOVD_FAILED) {
- mgr->mgr_waketime = CURRENT_TIME;
- mgr->mgr_timeout = 5 * HZ;
- schedule_timeout(mgr->mgr_timeout);
+ LASSERT(recovd->recovd_phase != RECOVD_IDLE &&
+ recovd->recovd_current_rd);
- }
-
- if (mgr->mgr_flags & MGR_WORKING &&
- CURRENT_TIME <= mgr->mgr_waketime + mgr->mgr_timeout ) {
- CERROR("WORKING: new event\n");
+ rd = recovd->recovd_current_rd;
+ cb_failed:
+ CERROR("recovery FAILED for rd %p (conn %p), recovering\n",
+ rd, class_rd2conn(rd));
- mgr->mgr_waketime = CURRENT_TIME;
- schedule_timeout(mgr->mgr_timeout);
- }
+ list_add(&rd->rd_managed_chain, &recovd->recovd_managed_items);
+ spin_unlock(&recovd->recovd_lock);
+ rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_FAILURE);
+ spin_lock(&recovd->recovd_lock);
+ recovd->recovd_phase = RECOVD_IDLE;
+ recovd->recovd_next_phase = RECOVD_PREPARING;
+
+ recovd->recovd_flags &= ~RECOVD_FAILED;
- if (mgr->mgr_flags & MGR_STOPPING) {
- CERROR("ha mgr stopping\n");
- rc = 1;
+ RETURN(1);
}
- spin_unlock(&mgr->mgr_lock);
- RETURN(rc);
-}
-
-int recovd_handle_event(struct connmgr_obd *mgr)
-{
-
- spin_lock(&mgr->mgr_lock);
-
- if (!(mgr->mgr_flags & MGR_WORKING) &&
- !list_empty(&mgr->mgr_troubled_lh) ) {
+ switch (recovd->recovd_phase) {
+ case RECOVD_IDLE:
+ if (recovd->recovd_current_rd ||
+ list_empty(&recovd->recovd_troubled_items))
+ break;
+ rd = list_entry(recovd->recovd_troubled_items.next,
+ struct recovd_data, rd_managed_chain);
+
+ list_del(&rd->rd_managed_chain);
+ if (!rd->rd_recover)
+ LBUG();
+
+ CERROR("starting recovery for rd %p (conn %p)\n",
+ rd, class_rd2conn(rd));
+ recovd->recovd_current_rd = rd;
+ recovd->recovd_flags &= ~RECOVD_FAILED;
+ recovd->recovd_phase = RECOVD_PREPARING;
+
+ spin_unlock(&recovd->recovd_lock);
+ rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_PREPARE);
+ spin_lock(&recovd->recovd_lock);
+ if (rc)
+ goto cb_failed;
+
+ recovd->recovd_next_phase = RECOVD_PREPARED;
+ break;
- CERROR("connection in trouble - state: WORKING, upcall\n");
- mgr->mgr_flags = MGR_WORKING;
+ case RECOVD_PREPARED:
+ rd = recovd->recovd_current_rd;
+ recovd->recovd_phase = RECOVD_RECOVERING;
+ CERROR("recovery prepared for rd %p (conn %p), recovering\n",
+ rd, class_rd2conn(rd));
- connmgr_upcall();
- mgr->mgr_waketime = CURRENT_TIME;
- mgr->mgr_timeout = 5 * HZ;
- schedule_timeout(mgr->mgr_timeout);
+ spin_unlock(&recovd->recovd_lock);
+ rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_RECOVER);
+ spin_lock(&recovd->recovd_lock);
+ if (rc)
+ goto cb_failed;
+
+ recovd->recovd_next_phase = RECOVD_RECOVERED;
+ break;
- }
+ case RECOVD_RECOVERED:
+ rd = recovd->recovd_current_rd;
+ recovd->recovd_phase = RECOVD_IDLE;
+ recovd->recovd_next_phase = RECOVD_PREPARING;
- if (mgr->mgr_flags & MGR_WORKING &&
- CURRENT_TIME <= mgr->mgr_waketime + mgr->mgr_timeout ) {
- CERROR("WORKING: new event\n");
+ CERROR("recovery complete for rd %p (conn %p), recovering\n",
+ rd, class_rd2conn(rd));
+ break;
- mgr->mgr_waketime = CURRENT_TIME;
- schedule_timeout(mgr->mgr_timeout);
+ default:
+ break;
}
- spin_unlock(&mgr->mgr_lock);
- return 0;
+ RETURN(0);
}
static int recovd_main(void *arg)
{
- struct connmgr_thread *data = (struct connmgr_thread *)arg;
- struct connmgr_obd *mgr = data->mgr;
+ struct recovd_obd *recovd = (struct recovd_obd *)arg;
ENTRY;
recalc_sigpending(current);
spin_unlock_irq(¤t->sigmask_lock);
- sprintf(current->comm, data->name);
+ sprintf(current->comm, "lustre_recovd");
+ unlock_kernel();
/* Record that the thread is running */
- mgr->mgr_thread = current;
- mgr->mgr_flags = MGR_RUNNING;
- wake_up(&mgr->mgr_ctl_waitq);
+ recovd->recovd_thread = current;
+ recovd->recovd_flags = RECOVD_IDLE;
+ wake_up(&recovd->recovd_ctl_waitq);
/* And now, loop forever on requests */
while (1) {
- wait_event_interruptible(mgr->mgr_waitq,
- recovd_check_event(mgr));
+ wait_event(recovd->recovd_waitq, recovd_check_event(recovd));
- spin_lock(&mgr->mgr_lock);
- if (mgr->mgr_flags & MGR_STOPPING) {
- spin_unlock(&mgr->mgr_lock);
- CERROR("lustre_hamgr quitting\n");
+ spin_lock(&recovd->recovd_lock);
+
+ if (recovd->recovd_flags & RECOVD_STOPPING) {
+ spin_unlock(&recovd->recovd_lock);
+ CERROR("lustre_recovd stopping\n");
EXIT;
break;
}
- recovd_handle_event(mgr);
- spin_unlock(&mgr->mgr_lock);
+ recovd_handle_event(recovd);
+ spin_unlock(&recovd->recovd_lock);
}
- mgr->mgr_thread = NULL;
- mgr->mgr_flags = MGR_STOPPED;
- wake_up(&mgr->mgr_ctl_waitq);
+ recovd->recovd_thread = NULL;
+ recovd->recovd_flags = RECOVD_STOPPED;
+ wake_up(&recovd->recovd_ctl_waitq);
CDEBUG(D_NET, "mgr exiting process %d\n", current->pid);
RETURN(0);
}
-int recovd_setup(struct connmgr_obd *mgr)
+int recovd_setup(struct recovd_obd *recovd)
{
- struct connmgr_thread d;
int rc;
- ENTRY;
+ extern void (*class_signal_connection_failure)
+ (struct ptlrpc_connection *);
- INIT_LIST_HEAD(&mgr->mgr_connections_lh);
- INIT_LIST_HEAD(&mgr->mgr_troubled_lh);
- spin_lock_init(&mgr->mgr_lock);
+ ENTRY;
- d.mgr = mgr;
- d.name = "lustre_recovd";
+ INIT_LIST_HEAD(&recovd->recovd_managed_items);
+ INIT_LIST_HEAD(&recovd->recovd_troubled_items);
+ spin_lock_init(&recovd->recovd_lock);
- init_waitqueue_head(&mgr->mgr_waitq);
- init_waitqueue_head(&mgr->mgr_ctl_waitq);
+ init_waitqueue_head(&recovd->recovd_waitq);
+ init_waitqueue_head(&recovd->recovd_recovery_waitq);
+ init_waitqueue_head(&recovd->recovd_ctl_waitq);
- rc = kernel_thread(recovd_main, (void *) &d,
+ recovd->recovd_next_phase = RECOVD_PREPARING;
+
+ rc = kernel_thread(recovd_main, (void *)recovd,
CLONE_VM | CLONE_FS | CLONE_FILES);
if (rc < 0) {
CERROR("cannot start thread\n");
RETURN(-EINVAL);
}
- wait_event(mgr->mgr_ctl_waitq, mgr->mgr_flags & MGR_RUNNING);
+ wait_event(recovd->recovd_ctl_waitq,
+ recovd->recovd_phase == RECOVD_IDLE);
- RETURN(0);
-}
+ /* exported and called by obdclass timeout handlers */
+ class_signal_connection_failure = recovd_conn_fail;
+ ptlrpc_recovd = recovd;
+ RETURN(0);
+}
-int recovd_cleanup(struct connmgr_obd *mgr)
+int recovd_cleanup(struct recovd_obd *recovd)
{
- mgr->mgr_flags = MGR_STOPPING;
+ spin_lock(&recovd->recovd_lock);
+ recovd->recovd_flags = RECOVD_STOPPING;
+ wake_up(&recovd->recovd_waitq);
+ spin_unlock(&recovd->recovd_lock);
- wake_up(&mgr->mgr_waitq);
- wait_event_interruptible(mgr->mgr_ctl_waitq,
- (mgr->mgr_flags & MGR_STOPPED));
+ wait_event(recovd->recovd_ctl_waitq,
+ (recovd->recovd_flags & RECOVD_STOPPED));
RETURN(0);
}
+
+struct recovd_obd *ptlrpc_recovd;