#define DEBUG_SUBSYSTEM S_RPC
-#include <linux/kmod.h>
#include <linux/lustre_lite.h>
#include <linux/lustre_ha.h>
#include <linux/obd_support.h>
-void recovd_conn_manage(struct recovd_obd *recovd,
- struct ptlrpc_connection *conn)
+/* dump_connection_list, but shorter for nicer debugging logs */
+static void d_c_l(struct list_head *head)
{
+ int sanity = 0;
+ struct list_head *tmp;
+
+ list_for_each(tmp, head) {
+ struct ptlrpc_connection *conn =
+ list_entry(tmp, struct ptlrpc_connection,
+ c_recovd_data.rd_managed_chain);
+ CDEBUG(D_HA, " %p = %s (%d/%d)\n", conn, conn->c_remote_uuid,
+ conn->c_recovd_data.rd_phase,
+ conn->c_recovd_data.rd_next_phase);
+ if (sanity++ > 50)
+ LBUG();
+ }
+}
+
+static void dump_lists(struct recovd_obd *recovd)
+{
+ CDEBUG(D_HA, "managed: \n");
+ d_c_l(&recovd->recovd_managed_items);
+ CDEBUG(D_HA, "troubled: \n");
+ d_c_l(&recovd->recovd_troubled_items);
+}
+
+void recovd_conn_manage(struct ptlrpc_connection *conn,
+ struct recovd_obd *recovd, ptlrpc_recovery_cb_t recover)
+{
+ struct recovd_data *rd = &conn->c_recovd_data;
ENTRY;
- conn->c_recovd = recovd;
+
+ if (!list_empty(&rd->rd_managed_chain)) {
+ if (rd->rd_recovd == recovd && rd->rd_recover == recover) {
+ CDEBUG(D_HA, "conn %p/%s already setup for recovery\n",
+ conn, conn->c_remote_uuid);
+ EXIT;
+ return;
+ }
+ CDEBUG(D_HA,
+ "conn %p/%s has recovery items %p/%p, making %p/%p\n",
+ conn, conn->c_remote_uuid, rd->rd_recovd, rd->rd_recover,
+ recovd, recover);
+ spin_lock(&rd->rd_recovd->recovd_lock);
+ list_del(&rd->rd_managed_chain);
+ spin_unlock(&rd->rd_recovd->recovd_lock);
+ }
+
+ rd->rd_recovd = recovd;
+ rd->rd_recover = recover;
+ rd->rd_phase = RD_IDLE;
+ rd->rd_next_phase = RD_TROUBLED;
+
spin_lock(&recovd->recovd_lock);
- list_add(&conn->c_recovd_data.rd_managed_chain,
- &recovd->recovd_managed_items);
+ list_add(&rd->rd_managed_chain, &recovd->recovd_managed_items);
+ dump_lists(recovd);
spin_unlock(&recovd->recovd_lock);
+
EXIT;
}
void recovd_conn_fail(struct ptlrpc_connection *conn)
{
+ struct recovd_data *rd = &conn->c_recovd_data;
+ struct recovd_obd *recovd = rd->rd_recovd;
ENTRY;
- spin_lock(&conn->c_recovd->recovd_lock);
- conn->c_recovd->recovd_flags |= RECOVD_FAIL;
- conn->c_recovd->recovd_wakeup_flag = 1;
- list_del(&conn->c_recovd_data.rd_managed_chain);
- list_add(&conn->c_recovd_data.rd_managed_chain,
- &conn->c_recovd->recovd_troubled_items);
- spin_unlock(&conn->c_recovd->recovd_lock);
- wake_up(&conn->c_recovd->recovd_waitq);
+
+ if (!recovd) {
+ CERROR("no recovd for connection %p\n", conn);
+ EXIT;
+ return;
+ }
+
+ spin_lock(&recovd->recovd_lock);
+ if (rd->rd_phase != RD_IDLE) {
+ CERROR("connection %p to %s already in recovery\n",
+ conn, conn->c_remote_uuid);
+ /* XXX need to distinguish from failure-in-recovery */
+ spin_unlock(&recovd->recovd_lock);
+ EXIT;
+ return;
+ }
+
+ CERROR("connection %p to %s failed\n", conn, conn->c_remote_uuid);
+ list_del(&rd->rd_managed_chain);
+ list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
+ rd->rd_phase = RD_TROUBLED;
+ dump_lists(recovd);
+ spin_unlock(&recovd->recovd_lock);
+
+ wake_up(&recovd->recovd_waitq);
+
EXIT;
}
-/* this function must be called with conn->c_lock held */
void recovd_conn_fixed(struct ptlrpc_connection *conn)
{
+ struct recovd_data *rd = &conn->c_recovd_data;
ENTRY;
- list_del(&conn->c_recovd_data.rd_managed_chain);
- list_add(&conn->c_recovd_data.rd_managed_chain,
- &conn->c_recovd->recovd_managed_items);
- EXIT;
-}
+ CDEBUG(D_HA, "connection %p (now to %s) fixed\n",
+ conn, conn->c_remote_uuid);
+ spin_lock(&rd->rd_recovd->recovd_lock);
+ list_del(&rd->rd_managed_chain);
+ rd->rd_phase = RD_IDLE;
+ rd->rd_next_phase = RD_TROUBLED;
+ list_add(&rd->rd_managed_chain, &rd->rd_recovd->recovd_managed_items);
+ dump_lists(rd->rd_recovd);
+ spin_unlock(&rd->rd_recovd->recovd_lock);
-static int recovd_upcall(void)
-{
- char *argv[2];
- char *envp[3];
-
- argv[0] = obd_recovery_upcall;
- argv[1] = NULL;
-
- envp [0] = "HOME=/";
- envp [1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
- envp [2] = NULL;
-
- return call_usermodehelper(argv[0], argv, envp);
+ EXIT;
}
+
static int recovd_check_event(struct recovd_obd *recovd)
{
int rc = 0;
+ struct list_head *tmp;
+
ENTRY;
spin_lock(&recovd->recovd_lock);
- recovd->recovd_waketime = CURRENT_TIME;
- if (recovd->recovd_timeout)
- schedule_timeout(recovd->recovd_timeout);
-
- if (recovd->recovd_wakeup_flag) {
- CERROR("service woken\n");
+ if (recovd->recovd_state == RECOVD_STOPPING)
GOTO(out, rc = 1);
- }
- if (recovd->recovd_timeout &&
- CURRENT_TIME > recovd->recovd_waketime + recovd->recovd_timeout) {
- recovd->recovd_flags |= RECOVD_TIMEOUT;
- CERROR("timeout\n");
- GOTO(out, rc = 1);
- }
+ list_for_each(tmp, &recovd->recovd_troubled_items) {
- if (recovd->recovd_flags & RECOVD_STOPPING) {
- CERROR("recovd stopping\n");
- rc = 1;
+ struct recovd_data *rd = list_entry(tmp, struct recovd_data,
+ rd_managed_chain);
+
+ if (rd->rd_phase == rd->rd_next_phase ||
+ rd->rd_phase == RD_FAILED)
+ GOTO(out, rc = 1);
}
out:
- recovd->recovd_wakeup_flag = 0;
spin_unlock(&recovd->recovd_lock);
RETURN(rc);
}
static int recovd_handle_event(struct recovd_obd *recovd)
{
+ struct list_head *tmp, *n;
+ int rc = 0;
ENTRY;
- if (!(recovd->recovd_flags & RECOVD_UPCALL_WAIT) &&
- recovd->recovd_flags & RECOVD_FAIL) {
-
- CERROR("client in trouble: flags -> UPCALL_WAITING\n");
- recovd->recovd_flags |= RECOVD_UPCALL_WAIT;
-
- recovd_upcall();
- recovd->recovd_waketime = CURRENT_TIME;
- recovd->recovd_timeout = 10 * HZ;
- schedule_timeout(recovd->recovd_timeout);
- }
-
- if (recovd->recovd_flags & RECOVD_TIMEOUT) {
- CERROR("timeout - no news from upcall?\n");
- recovd->recovd_flags &= ~RECOVD_TIMEOUT;
- }
-
- if (recovd->recovd_flags & RECOVD_UPCALL_ANSWER) {
- CERROR("UPCALL_WAITING: upcall answer\n");
-
- while (!list_empty(&recovd->recovd_troubled_items)) {
- struct recovd_data *rd =
- list_entry(recovd->recovd_troubled_items.next,
- struct recovd_data, rd_managed_chain);
+ spin_lock(&recovd->recovd_lock);
- list_del(&rd->rd_managed_chain);
- if (rd->rd_recover) {
- spin_unlock(&recovd->recovd_lock);
- rd->rd_recover(rd);
- spin_lock(&recovd->recovd_lock);
+ dump_lists(recovd);
+
+ /*
+ * We use _safe here because one of the callbacks, expecially
+ * FAILURE or PREPARED, could move list items around.
+ */
+ list_for_each_safe(tmp, n, &recovd->recovd_troubled_items) {
+ struct recovd_data *rd = list_entry(tmp, struct recovd_data,
+ rd_managed_chain);
+
+ if (rd->rd_phase != RD_FAILED &&
+ rd->rd_phase != rd->rd_next_phase)
+ continue;
+
+ switch (rd->rd_phase) {
+ case RD_FAILED:
+ cb_failed: /* must always reach here with recovd_lock held! */
+ CERROR("recovery FAILED for rd %p (conn %p): %d\n",
+ rd, class_rd2conn(rd), rc);
+
+ spin_unlock(&recovd->recovd_lock);
+ (void)rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_FAILURE);
+ spin_lock(&recovd->recovd_lock);
+ break;
+
+ case RD_TROUBLED:
+ if (!rd->rd_recover) {
+ CERROR("no rd_recover for rd %p (conn %p)\n",
+ rd, class_rd2conn(rd));
+ rc = -EINVAL;
+ break;
}
+ CERROR("starting recovery for rd %p (conn %p)\n",
+ rd, class_rd2conn(rd));
+ rd->rd_phase = RD_PREPARING;
+ rd->rd_next_phase = RD_PREPARED;
+
+ spin_unlock(&recovd->recovd_lock);
+ rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_PREPARE);
+ spin_lock(&recovd->recovd_lock);
+ if (rc)
+ goto cb_failed;
+
+ break;
+
+ case RD_PREPARED:
+
+ CERROR("recovery prepared for rd %p (conn %p)\n",
+ rd, class_rd2conn(rd));
+ rd->rd_phase = RD_RECOVERING;
+ rd->rd_next_phase = RD_RECOVERED;
+
+ spin_unlock(&recovd->recovd_lock);
+ rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_RECOVER);
+ spin_lock(&recovd->recovd_lock);
+ if (rc)
+ goto cb_failed;
+
+ break;
+
+ case RD_RECOVERED:
+ rd->rd_phase = RD_IDLE;
+ rd->rd_next_phase = RD_TROUBLED;
+
+ CERROR("recovery complete for rd %p (conn %p)\n",
+ rd, class_rd2conn(rd));
+ break;
+
+ default:
+ break;
}
-
- recovd->recovd_timeout = 0;
- recovd->recovd_flags = RECOVD_IDLE;
}
-
+ spin_unlock(&recovd->recovd_lock);
RETURN(0);
}
spin_unlock_irq(¤t->sigmask_lock);
sprintf(current->comm, "lustre_recovd");
+ unlock_kernel();
- /* Record that the thread is running */
+ /* Signal that the thread is running. */
recovd->recovd_thread = current;
- recovd->recovd_flags = RECOVD_IDLE;
+ recovd->recovd_state = RECOVD_READY;
wake_up(&recovd->recovd_ctl_waitq);
- /* And now, loop forever on requests */
+ /* And now, loop forever on requests. */
while (1) {
wait_event(recovd->recovd_waitq, recovd_check_event(recovd));
-
- spin_lock(&recovd->recovd_lock);
- if (recovd->recovd_flags & RECOVD_STOPPING) {
- spin_unlock(&recovd->recovd_lock);
- CERROR("lustre_recovd stopping\n");
- EXIT;
+ if (recovd->recovd_state == RECOVD_STOPPING)
break;
- }
-
recovd_handle_event(recovd);
- spin_unlock(&recovd->recovd_lock);
}
recovd->recovd_thread = NULL;
- recovd->recovd_flags = RECOVD_STOPPED;
+ recovd->recovd_state = RECOVD_STOPPED;
wake_up(&recovd->recovd_ctl_waitq);
- CDEBUG(D_NET, "mgr exiting process %d\n", current->pid);
+ CDEBUG(D_HA, "mgr exiting process %d\n", current->pid);
RETURN(0);
}
int recovd_setup(struct recovd_obd *recovd)
{
int rc;
- extern void (*class_signal_connection_failure)
- (struct ptlrpc_connection *);
ENTRY;
CERROR("cannot start thread\n");
RETURN(-EINVAL);
}
- wait_event(recovd->recovd_ctl_waitq, recovd->recovd_flags & RECOVD_IDLE);
+ wait_event(recovd->recovd_ctl_waitq,
+ recovd->recovd_state == RECOVD_READY);
- /* exported and called by obdclass timeout handlers */
+ ptlrpc_recovd = recovd;
class_signal_connection_failure = recovd_conn_fail;
RETURN(0);
int recovd_cleanup(struct recovd_obd *recovd)
{
+ ENTRY;
spin_lock(&recovd->recovd_lock);
- recovd->recovd_flags = RECOVD_STOPPING;
+ recovd->recovd_state = RECOVD_STOPPING;
wake_up(&recovd->recovd_waitq);
spin_unlock(&recovd->recovd_lock);
wait_event(recovd->recovd_ctl_waitq,
- (recovd->recovd_flags & RECOVD_STOPPED));
+ (recovd->recovd_state == RECOVD_STOPPED));
RETURN(0);
}
+
+struct recovd_obd *ptlrpc_recovd;