1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * Lustre High Availability Daemon
8 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
10 * This code is issued under the GNU General Public License.
11 * See the file COPYING in this distribution
13 * by Peter Braam <braam@clusterfs.com>
17 #define DEBUG_SUBSYSTEM S_RPC
19 #include <linux/lustre_lite.h>
20 #include <linux/lustre_ha.h>
21 #include <linux/obd_support.h>
23 /* dump_connection_list, but shorter for nicer debugging logs */
24 static void d_c_l(struct list_head *head)
27 struct list_head *tmp;
29 list_for_each(tmp, head) {
30 struct ptlrpc_connection *conn =
31 list_entry(tmp, struct ptlrpc_connection,
32 c_recovd_data.rd_managed_chain);
33 CDEBUG(D_HA, " %p = %s (%d/%d)\n", conn, conn->c_remote_uuid,
34 conn->c_recovd_data.rd_phase,
35 conn->c_recovd_data.rd_next_phase);
41 static void dump_lists(struct recovd_obd *recovd)
43 CDEBUG(D_HA, "managed: \n");
44 d_c_l(&recovd->recovd_managed_items);
45 CDEBUG(D_HA, "troubled: \n");
46 d_c_l(&recovd->recovd_troubled_items);
49 void recovd_conn_manage(struct ptlrpc_connection *conn,
50 struct recovd_obd *recovd, ptlrpc_recovery_cb_t recover)
52 struct recovd_data *rd = &conn->c_recovd_data;
54 if (!recovd || !recover) {
59 if (!list_empty(&rd->rd_managed_chain)) {
60 if (rd->rd_recovd == recovd && rd->rd_recover == recover) {
61 CDEBUG(D_HA, "conn %p/%s already setup for recovery\n",
62 conn, conn->c_remote_uuid);
67 "conn %p/%s has recovery items %p/%p, making %p/%p\n",
68 conn, conn->c_remote_uuid, rd->rd_recovd, rd->rd_recover,
70 spin_lock(&rd->rd_recovd->recovd_lock);
71 list_del_init(&rd->rd_managed_chain);
72 spin_unlock(&rd->rd_recovd->recovd_lock);
75 rd->rd_recovd = recovd;
76 rd->rd_recover = recover;
77 rd->rd_phase = RD_IDLE;
78 rd->rd_next_phase = RD_TROUBLED;
80 spin_lock(&recovd->recovd_lock);
81 list_add(&rd->rd_managed_chain, &recovd->recovd_managed_items);
83 spin_unlock(&recovd->recovd_lock);
88 void recovd_conn_unmanage(struct ptlrpc_connection *conn)
90 struct recovd_data *rd = &conn->c_recovd_data;
91 struct recovd_obd *recovd = rd->rd_recovd;
95 spin_lock(&recovd->recovd_lock);
96 list_del_init(&rd->rd_managed_chain);
98 spin_unlock(&recovd->recovd_lock);
100 /* should be safe enough, right? */
101 rd->rd_recover = NULL;
102 rd->rd_next_phase = RD_IDLE;
103 rd->rd_next_phase = RD_TROUBLED;
106 void recovd_conn_fail(struct ptlrpc_connection *conn)
108 struct recovd_data *rd = &conn->c_recovd_data;
109 struct recovd_obd *recovd = rd->rd_recovd;
113 CERROR("no recovd for connection %p\n", conn);
118 spin_lock(&recovd->recovd_lock);
119 if (rd->rd_phase == RD_TROUBLED || rd->rd_phase == RD_PREPARING) {
120 CDEBUG(D_HA, "connection %p to %s already in recovery\n",
121 conn, conn->c_remote_uuid);
122 spin_unlock(&recovd->recovd_lock);
127 CERROR("connection %p to %s failed\n", conn, conn->c_remote_uuid);
128 CERROR("peer is %08x %08lx %08lx\n", conn->c_peer.peer_nid,
129 conn->c_peer.peer_ni.nal_idx, conn->c_peer.peer_ni.handle_idx);
130 list_del(&rd->rd_managed_chain);
131 list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
132 if (rd->rd_phase != RD_IDLE) {
134 "connection %p to %s failed in recovery: restarting\n",
135 conn, conn->c_remote_uuid);
136 /* XXX call callback with PHASE_FAILED? */
137 rd->rd_next_phase = RD_TROUBLED;
139 rd->rd_phase = RD_TROUBLED;
141 spin_unlock(&recovd->recovd_lock);
143 wake_up(&recovd->recovd_waitq);
148 void recovd_conn_fixed(struct ptlrpc_connection *conn)
150 struct recovd_data *rd = &conn->c_recovd_data;
153 CDEBUG(D_HA, "connection %p (now to %s) fixed\n",
154 conn, conn->c_remote_uuid);
155 spin_lock(&rd->rd_recovd->recovd_lock);
156 list_del(&rd->rd_managed_chain);
157 rd->rd_phase = RD_IDLE;
158 rd->rd_next_phase = RD_TROUBLED;
159 list_add(&rd->rd_managed_chain, &rd->rd_recovd->recovd_managed_items);
160 dump_lists(rd->rd_recovd);
161 spin_unlock(&rd->rd_recovd->recovd_lock);
166 static int recovd_check_event(struct recovd_obd *recovd)
169 struct list_head *tmp;
173 spin_lock(&recovd->recovd_lock);
175 if (recovd->recovd_state == RECOVD_STOPPING)
178 list_for_each(tmp, &recovd->recovd_troubled_items) {
180 struct recovd_data *rd = list_entry(tmp, struct recovd_data,
183 if (rd->rd_phase == rd->rd_next_phase ||
184 rd->rd_phase == RD_FAILED)
189 spin_unlock(&recovd->recovd_lock);
193 static int recovd_handle_event(struct recovd_obd *recovd)
195 struct list_head *tmp, *n;
199 spin_lock(&recovd->recovd_lock);
204 * We use _safe here because one of the callbacks, expecially
205 * FAILURE or PREPARED, could move list items around.
207 list_for_each_safe(tmp, n, &recovd->recovd_troubled_items) {
208 struct recovd_data *rd = list_entry(tmp, struct recovd_data,
211 if (rd->rd_phase != RD_FAILED &&
212 rd->rd_phase != rd->rd_next_phase)
215 switch (rd->rd_phase) {
217 cb_failed: /* must always reach here with recovd_lock held! */
218 CERROR("recovery FAILED for rd %p (conn %p): %d\n",
219 rd, class_rd2conn(rd), rc);
221 spin_unlock(&recovd->recovd_lock);
222 (void)rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_FAILURE);
223 spin_lock(&recovd->recovd_lock);
227 if (!rd->rd_recover) {
228 CERROR("no rd_recover for rd %p (conn %p)\n",
229 rd, class_rd2conn(rd));
233 CERROR("starting recovery for rd %p (conn %p)\n",
234 rd, class_rd2conn(rd));
235 rd->rd_phase = RD_PREPARING;
236 rd->rd_next_phase = RD_PREPARED;
238 spin_unlock(&recovd->recovd_lock);
239 rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_PREPARE);
240 spin_lock(&recovd->recovd_lock);
248 CERROR("recovery prepared for rd %p (conn %p)\n",
249 rd, class_rd2conn(rd));
250 rd->rd_phase = RD_RECOVERING;
251 rd->rd_next_phase = RD_RECOVERED;
253 spin_unlock(&recovd->recovd_lock);
254 rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_RECOVER);
255 spin_lock(&recovd->recovd_lock);
262 rd->rd_phase = RD_IDLE;
263 rd->rd_next_phase = RD_TROUBLED;
265 CERROR("recovery complete for rd %p (conn %p)\n",
266 rd, class_rd2conn(rd));
273 spin_unlock(&recovd->recovd_lock);
277 static int recovd_main(void *arg)
279 struct recovd_obd *recovd = (struct recovd_obd *)arg;
286 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
287 sigfillset(¤t->blocked);
290 spin_lock_irq(¤t->sigmask_lock);
291 sigfillset(¤t->blocked);
292 recalc_sigpending(current);
293 spin_unlock_irq(¤t->sigmask_lock);
296 sprintf(current->comm, "lustre_recovd");
299 /* Signal that the thread is running. */
300 recovd->recovd_thread = current;
301 recovd->recovd_state = RECOVD_READY;
302 wake_up(&recovd->recovd_ctl_waitq);
304 /* And now, loop forever on requests. */
306 wait_event(recovd->recovd_waitq, recovd_check_event(recovd));
307 if (recovd->recovd_state == RECOVD_STOPPING)
309 recovd_handle_event(recovd);
312 recovd->recovd_thread = NULL;
313 recovd->recovd_state = RECOVD_STOPPED;
314 wake_up(&recovd->recovd_ctl_waitq);
315 CDEBUG(D_HA, "mgr exiting process %d\n", current->pid);
319 int recovd_setup(struct recovd_obd *recovd)
325 INIT_LIST_HEAD(&recovd->recovd_managed_items);
326 INIT_LIST_HEAD(&recovd->recovd_troubled_items);
327 spin_lock_init(&recovd->recovd_lock);
329 init_waitqueue_head(&recovd->recovd_waitq);
330 init_waitqueue_head(&recovd->recovd_recovery_waitq);
331 init_waitqueue_head(&recovd->recovd_ctl_waitq);
333 rc = kernel_thread(recovd_main, (void *)recovd,
334 CLONE_VM | CLONE_FS | CLONE_FILES);
336 CERROR("cannot start thread\n");
339 wait_event(recovd->recovd_ctl_waitq,
340 recovd->recovd_state == RECOVD_READY);
342 ptlrpc_recovd = recovd;
343 class_signal_connection_failure = recovd_conn_fail;
348 int recovd_cleanup(struct recovd_obd *recovd)
351 spin_lock(&recovd->recovd_lock);
352 recovd->recovd_state = RECOVD_STOPPING;
353 wake_up(&recovd->recovd_waitq);
354 spin_unlock(&recovd->recovd_lock);
356 wait_event(recovd->recovd_ctl_waitq,
357 (recovd->recovd_state == RECOVD_STOPPED));
361 struct recovd_obd *ptlrpc_recovd;