1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * Lustre High Availability Daemon
8 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
10 * This code is issued under the GNU General Public License.
11 * See the file COPYING in this distribution
13 * by Peter Braam <braam@clusterfs.com>
17 #define DEBUG_SUBSYSTEM S_RPC
19 #include <linux/lustre_lite.h>
20 #include <linux/lustre_ha.h>
21 #include <linux/obd_support.h>
23 /* dump_connection_list, but shorter for nicer debugging logs */
24 static void d_c_l(struct list_head *head)
26 struct list_head *tmp;
28 list_for_each(tmp, head) {
29 struct ptlrpc_connection *conn =
30 list_entry(tmp, struct ptlrpc_connection,
31 c_recovd_data.rd_managed_chain);
32 CDEBUG(D_HA, " %p = %s (%d/%d)\n", conn,
33 conn->c_remote_uuid.uuid,
34 conn->c_recovd_data.rd_phase,
35 conn->c_recovd_data.rd_next_phase);
39 static void dump_lists(struct recovd_obd *recovd)
41 CDEBUG(D_HA, "managed: \n");
42 d_c_l(&recovd->recovd_managed_items);
43 CDEBUG(D_HA, "troubled: \n");
44 d_c_l(&recovd->recovd_troubled_items);
47 void recovd_conn_manage(struct ptlrpc_connection *conn,
48 struct recovd_obd *recovd, ptlrpc_recovery_cb_t recover)
50 struct recovd_data *rd = &conn->c_recovd_data;
52 if (!recovd || !recover) {
57 if (!list_empty(&rd->rd_managed_chain)) {
58 if (rd->rd_recovd == recovd && rd->rd_recover == recover) {
59 CDEBUG(D_HA, "conn %p/%s already setup for recovery\n",
60 conn, conn->c_remote_uuid.uuid);
65 "conn %p/%s has recovery items %p/%p, making %p/%p\n",
66 conn, conn->c_remote_uuid.uuid, rd->rd_recovd, rd->rd_recover,
68 spin_lock(&rd->rd_recovd->recovd_lock);
69 list_del_init(&rd->rd_managed_chain);
70 spin_unlock(&rd->rd_recovd->recovd_lock);
73 rd->rd_recovd = recovd;
74 rd->rd_recover = recover;
75 rd->rd_phase = RD_IDLE;
76 rd->rd_next_phase = RD_TROUBLED;
78 spin_lock(&recovd->recovd_lock);
79 list_add(&rd->rd_managed_chain, &recovd->recovd_managed_items);
81 spin_unlock(&recovd->recovd_lock);
86 void recovd_conn_unmanage(struct ptlrpc_connection *conn)
88 struct recovd_data *rd = &conn->c_recovd_data;
89 struct recovd_obd *recovd = rd->rd_recovd;
93 spin_lock(&recovd->recovd_lock);
94 list_del_init(&rd->rd_managed_chain);
96 spin_unlock(&recovd->recovd_lock);
98 /* should be safe enough, right? */
99 rd->rd_recover = NULL;
100 rd->rd_next_phase = RD_IDLE;
101 rd->rd_next_phase = RD_TROUBLED;
104 void recovd_conn_fail(struct ptlrpc_connection *conn)
106 struct recovd_data *rd = &conn->c_recovd_data;
107 struct recovd_obd *recovd = rd->rd_recovd;
111 CERROR("no recovd for connection %p\n", conn);
116 spin_lock(&recovd->recovd_lock);
117 if (rd->rd_phase == RD_TROUBLED || rd->rd_phase == RD_PREPARING) {
118 CDEBUG(D_HA, "connection %p to %s already in recovery\n",
119 conn, conn->c_remote_uuid.uuid);
120 spin_unlock(&recovd->recovd_lock);
125 CERROR("connection %p to %s (%08x %08lx %08lx) failed\n", conn,
126 conn->c_remote_uuid.uuid, conn->c_peer.peer_nid,
127 conn->c_peer.peer_ni.nal_idx, conn->c_peer.peer_ni.handle_idx);
128 list_del(&rd->rd_managed_chain);
129 list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
130 if (rd->rd_phase != RD_IDLE) {
132 "connection %p to %s failed in recovery: restarting\n",
133 conn, conn->c_remote_uuid.uuid);
134 /* XXX call callback with PHASE_FAILED? */
135 rd->rd_next_phase = RD_TROUBLED;
137 rd->rd_phase = RD_TROUBLED;
139 spin_unlock(&recovd->recovd_lock);
141 wake_up(&recovd->recovd_waitq);
146 void recovd_conn_fixed(struct ptlrpc_connection *conn)
148 struct recovd_data *rd = &conn->c_recovd_data;
151 CDEBUG(D_HA, "connection %p (now to %s) fixed\n",
152 conn, conn->c_remote_uuid.uuid);
153 spin_lock(&rd->rd_recovd->recovd_lock);
154 list_del(&rd->rd_managed_chain);
155 rd->rd_phase = RD_IDLE;
156 rd->rd_next_phase = RD_TROUBLED;
157 list_add(&rd->rd_managed_chain, &rd->rd_recovd->recovd_managed_items);
158 dump_lists(rd->rd_recovd);
159 spin_unlock(&rd->rd_recovd->recovd_lock);
164 static int recovd_check_event(struct recovd_obd *recovd)
167 struct list_head *tmp;
171 spin_lock(&recovd->recovd_lock);
173 if (recovd->recovd_state == RECOVD_STOPPING)
176 list_for_each(tmp, &recovd->recovd_troubled_items) {
178 struct recovd_data *rd = list_entry(tmp, struct recovd_data,
181 if (rd->rd_phase == rd->rd_next_phase ||
182 rd->rd_phase == RD_FAILED)
187 spin_unlock(&recovd->recovd_lock);
191 static int recovd_handle_event(struct recovd_obd *recovd)
193 struct list_head *tmp, *n;
197 spin_lock(&recovd->recovd_lock);
202 * We use _safe here because one of the callbacks, expecially
203 * FAILURE or PREPARED, could move list items around.
205 list_for_each_safe(tmp, n, &recovd->recovd_troubled_items) {
206 struct recovd_data *rd = list_entry(tmp, struct recovd_data,
209 if (rd->rd_phase != RD_FAILED &&
210 rd->rd_phase != rd->rd_next_phase)
213 switch (rd->rd_phase) {
215 cb_failed: /* must always reach here with recovd_lock held! */
216 CERROR("recovery FAILED for rd %p (conn %p): %d\n",
217 rd, class_rd2conn(rd), rc);
219 spin_unlock(&recovd->recovd_lock);
220 (void)rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_FAILURE);
221 spin_lock(&recovd->recovd_lock);
225 if (!rd->rd_recover) {
226 CERROR("no rd_recover for rd %p (conn %p)\n",
227 rd, class_rd2conn(rd));
231 CERROR("starting recovery for rd %p (conn %p)\n",
232 rd, class_rd2conn(rd));
233 rd->rd_phase = RD_PREPARING;
234 rd->rd_next_phase = RD_PREPARED;
236 spin_unlock(&recovd->recovd_lock);
237 rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_PREPARE);
238 spin_lock(&recovd->recovd_lock);
246 CERROR("recovery prepared for rd %p (conn %p)\n",
247 rd, class_rd2conn(rd));
248 rd->rd_phase = RD_RECOVERING;
249 rd->rd_next_phase = RD_RECOVERED;
251 spin_unlock(&recovd->recovd_lock);
252 rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_RECOVER);
253 spin_lock(&recovd->recovd_lock);
260 rd->rd_phase = RD_IDLE;
261 rd->rd_next_phase = RD_TROUBLED;
263 CERROR("recovery complete for rd %p (conn %p)\n",
264 rd, class_rd2conn(rd));
271 spin_unlock(&recovd->recovd_lock);
275 static int recovd_main(void *arg)
277 struct recovd_obd *recovd = (struct recovd_obd *)arg;
284 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
285 sigfillset(¤t->blocked);
288 spin_lock_irqsave(¤t->sigmask_lock, flags);
289 sigfillset(¤t->blocked);
290 recalc_sigpending(current);
291 spin_unlock_irqrestore(¤t->sigmask_lock, flags);
294 sprintf(current->comm, "lustre_recovd");
297 /* Signal that the thread is running. */
298 recovd->recovd_thread = current;
299 recovd->recovd_state = RECOVD_READY;
300 wake_up(&recovd->recovd_ctl_waitq);
302 /* And now, loop forever on requests. */
304 wait_event(recovd->recovd_waitq, recovd_check_event(recovd));
305 if (recovd->recovd_state == RECOVD_STOPPING)
307 recovd_handle_event(recovd);
310 recovd->recovd_thread = NULL;
311 recovd->recovd_state = RECOVD_STOPPED;
312 wake_up(&recovd->recovd_ctl_waitq);
313 CDEBUG(D_HA, "mgr exiting process %d\n", current->pid);
317 int recovd_setup(struct recovd_obd *recovd)
323 INIT_LIST_HEAD(&recovd->recovd_managed_items);
324 INIT_LIST_HEAD(&recovd->recovd_troubled_items);
325 spin_lock_init(&recovd->recovd_lock);
327 init_waitqueue_head(&recovd->recovd_waitq);
328 init_waitqueue_head(&recovd->recovd_recovery_waitq);
329 init_waitqueue_head(&recovd->recovd_ctl_waitq);
331 rc = kernel_thread(recovd_main, (void *)recovd,
332 CLONE_VM | CLONE_FS | CLONE_FILES);
334 CERROR("cannot start thread\n");
337 wait_event(recovd->recovd_ctl_waitq,
338 recovd->recovd_state == RECOVD_READY);
340 ptlrpc_recovd = recovd;
341 class_signal_connection_failure = recovd_conn_fail;
346 int recovd_cleanup(struct recovd_obd *recovd)
349 spin_lock(&recovd->recovd_lock);
350 recovd->recovd_state = RECOVD_STOPPING;
351 wake_up(&recovd->recovd_waitq);
352 spin_unlock(&recovd->recovd_lock);
354 wait_event(recovd->recovd_ctl_waitq,
355 (recovd->recovd_state == RECOVD_STOPPED));
359 struct recovd_obd *ptlrpc_recovd;