1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * Lustre High Availability Daemon
8 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
10 * This code is issued under the GNU General Public License.
11 * See the file COPYING in this distribution
13 * by Peter Braam <braam@clusterfs.com>
17 #define DEBUG_SUBSYSTEM S_RPC
19 #include <liblustre.h>
20 #include <linux/obd.h>
21 #include <linux/obd_class.h>
23 #include <linux/lustre_lite.h>
26 #include <linux/lustre_ha.h>
27 #include <linux/obd_support.h>
29 /* dump_connection_list, but shorter for nicer debugging logs */
30 static void d_c_l(struct list_head *head)
32 struct list_head *tmp;
34 list_for_each(tmp, head) {
35 struct ptlrpc_connection *conn =
36 list_entry(tmp, struct ptlrpc_connection,
37 c_recovd_data.rd_managed_chain);
38 CDEBUG(D_HA, " %p = %s (%d/%d)\n", conn,
39 conn->c_remote_uuid.uuid,
40 conn->c_recovd_data.rd_phase,
41 conn->c_recovd_data.rd_next_phase);
45 static void dump_lists(struct recovd_obd *recovd)
47 CDEBUG(D_HA, "managed: \n");
48 d_c_l(&recovd->recovd_managed_items);
49 CDEBUG(D_HA, "troubled: \n");
50 d_c_l(&recovd->recovd_troubled_items);
53 void recovd_conn_manage(struct ptlrpc_connection *conn,
54 struct recovd_obd *recovd, ptlrpc_recovery_cb_t recover)
56 struct recovd_data *rd = &conn->c_recovd_data;
58 if (!recovd || !recover) {
63 if (!list_empty(&rd->rd_managed_chain)) {
64 if (rd->rd_recovd == recovd && rd->rd_recover == recover) {
65 CDEBUG(D_HA, "conn %p/%s already setup for recovery\n",
66 conn, conn->c_remote_uuid.uuid);
71 "conn %p/%s has recovery items %p/%p, making %p/%p\n",
72 conn, conn->c_remote_uuid.uuid, rd->rd_recovd, rd->rd_recover,
74 spin_lock(&rd->rd_recovd->recovd_lock);
75 list_del_init(&rd->rd_managed_chain);
76 spin_unlock(&rd->rd_recovd->recovd_lock);
79 rd->rd_recovd = recovd;
80 rd->rd_recover = recover;
81 rd->rd_phase = RD_IDLE;
82 rd->rd_next_phase = RD_TROUBLED;
84 spin_lock(&recovd->recovd_lock);
85 list_add(&rd->rd_managed_chain, &recovd->recovd_managed_items);
87 spin_unlock(&recovd->recovd_lock);
92 void recovd_conn_unmanage(struct ptlrpc_connection *conn)
94 struct recovd_data *rd = &conn->c_recovd_data;
95 struct recovd_obd *recovd = rd->rd_recovd;
99 spin_lock(&recovd->recovd_lock);
100 list_del_init(&rd->rd_managed_chain);
101 rd->rd_recovd = NULL;
102 spin_unlock(&recovd->recovd_lock);
104 /* should be safe enough, right? */
105 rd->rd_recover = NULL;
106 rd->rd_next_phase = RD_IDLE;
107 rd->rd_next_phase = RD_TROUBLED;
110 void recovd_conn_fail(struct ptlrpc_connection *conn)
112 struct recovd_data *rd = &conn->c_recovd_data;
113 struct recovd_obd *recovd = rd->rd_recovd;
117 CERROR("no recovd for connection %p\n", conn);
122 spin_lock(&recovd->recovd_lock);
123 if (rd->rd_phase == RD_TROUBLED || rd->rd_phase == RD_PREPARING) {
124 CDEBUG(D_HA, "connection %p to %s already in recovery\n",
125 conn, conn->c_remote_uuid.uuid);
126 spin_unlock(&recovd->recovd_lock);
131 CERROR("connection %p to %s nid "LPX64" on %s failed\n", conn,
132 conn->c_remote_uuid.uuid, conn->c_peer.peer_nid,
133 conn->c_peer.peer_ni->pni_name);
134 list_del(&rd->rd_managed_chain);
135 list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
136 if (rd->rd_phase != RD_IDLE) {
138 "connection %p to %s failed in recovery: restarting\n",
139 conn, conn->c_remote_uuid.uuid);
140 /* XXX call callback with PHASE_FAILED? */
141 rd->rd_next_phase = RD_TROUBLED;
143 rd->rd_phase = RD_TROUBLED;
145 spin_unlock(&recovd->recovd_lock);
147 wake_up(&recovd->recovd_waitq);
152 void recovd_conn_fixed(struct ptlrpc_connection *conn)
154 struct recovd_data *rd = &conn->c_recovd_data;
157 CDEBUG(D_HA, "connection %p (now to %s) fixed\n",
158 conn, conn->c_remote_uuid.uuid);
159 spin_lock(&rd->rd_recovd->recovd_lock);
160 list_del(&rd->rd_managed_chain);
161 rd->rd_phase = RD_IDLE;
162 rd->rd_next_phase = RD_TROUBLED;
163 list_add(&rd->rd_managed_chain, &rd->rd_recovd->recovd_managed_items);
164 dump_lists(rd->rd_recovd);
165 spin_unlock(&rd->rd_recovd->recovd_lock);
170 static int recovd_check_event(struct recovd_obd *recovd)
173 struct list_head *tmp;
177 spin_lock(&recovd->recovd_lock);
179 if (recovd->recovd_state == RECOVD_STOPPING)
182 list_for_each(tmp, &recovd->recovd_troubled_items) {
184 struct recovd_data *rd = list_entry(tmp, struct recovd_data,
187 if (rd->rd_phase == rd->rd_next_phase ||
188 rd->rd_phase == RD_FAILED)
193 spin_unlock(&recovd->recovd_lock);
197 static int recovd_handle_event(struct recovd_obd *recovd)
199 struct list_head *tmp, *n;
203 spin_lock(&recovd->recovd_lock);
208 * We use _safe here because one of the callbacks, expecially
209 * FAILURE or PREPARED, could move list items around.
211 list_for_each_safe(tmp, n, &recovd->recovd_troubled_items) {
212 struct recovd_data *rd = list_entry(tmp, struct recovd_data,
215 if (rd->rd_phase != RD_FAILED &&
216 rd->rd_phase != rd->rd_next_phase)
219 switch (rd->rd_phase) {
221 cb_failed: /* must always reach here with recovd_lock held! */
222 CERROR("recovery FAILED for rd %p (conn %p): %d\n",
223 rd, class_rd2conn(rd), rc);
225 spin_unlock(&recovd->recovd_lock);
226 (void)rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_FAILURE);
227 spin_lock(&recovd->recovd_lock);
231 if (!rd->rd_recover) {
232 CERROR("no rd_recover for rd %p (conn %p)\n",
233 rd, class_rd2conn(rd));
237 CERROR("starting recovery for rd %p (conn %p)\n",
238 rd, class_rd2conn(rd));
239 rd->rd_phase = RD_PREPARING;
240 rd->rd_next_phase = RD_PREPARED;
242 spin_unlock(&recovd->recovd_lock);
243 rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_PREPARE);
244 spin_lock(&recovd->recovd_lock);
252 CERROR("recovery prepared for rd %p (conn %p)\n",
253 rd, class_rd2conn(rd));
254 rd->rd_phase = RD_RECOVERING;
255 rd->rd_next_phase = RD_RECOVERED;
257 spin_unlock(&recovd->recovd_lock);
258 rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_RECOVER);
259 spin_lock(&recovd->recovd_lock);
266 rd->rd_phase = RD_IDLE;
267 rd->rd_next_phase = RD_TROUBLED;
269 CERROR("recovery complete for rd %p (conn %p)\n",
270 rd, class_rd2conn(rd));
277 spin_unlock(&recovd->recovd_lock);
282 static int recovd_main(void *arg)
284 struct recovd_obd *recovd = (struct recovd_obd *)arg;
291 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
292 sigfillset(¤t->blocked);
295 spin_lock_irqsave(¤t->sigmask_lock, flags);
296 sigfillset(¤t->blocked);
297 recalc_sigpending(current);
298 spin_unlock_irqrestore(¤t->sigmask_lock, flags);
301 sprintf(current->comm, "lustre_recovd");
304 /* Signal that the thread is running. */
305 recovd->recovd_thread = current;
306 recovd->recovd_state = RECOVD_READY;
307 wake_up(&recovd->recovd_ctl_waitq);
309 /* And now, loop forever on requests. */
311 wait_event(recovd->recovd_waitq, recovd_check_event(recovd));
312 if (recovd->recovd_state == RECOVD_STOPPING)
314 recovd_handle_event(recovd);
317 recovd->recovd_thread = NULL;
318 recovd->recovd_state = RECOVD_STOPPED;
319 wake_up(&recovd->recovd_ctl_waitq);
320 CDEBUG(D_HA, "mgr exiting process %d\n", current->pid);
324 int recovd_setup(struct recovd_obd *recovd)
326 int rc = 0; /* initialize for Liblustre */
330 INIT_LIST_HEAD(&recovd->recovd_managed_items);
331 INIT_LIST_HEAD(&recovd->recovd_troubled_items);
332 spin_lock_init(&recovd->recovd_lock);
334 init_waitqueue_head(&recovd->recovd_waitq);
335 init_waitqueue_head(&recovd->recovd_recovery_waitq);
336 init_waitqueue_head(&recovd->recovd_ctl_waitq);
338 rc = kernel_thread(recovd_main, (void *)recovd,
339 CLONE_VM | CLONE_FS | CLONE_FILES);
341 CERROR("cannot start thread\n");
344 wait_event(recovd->recovd_ctl_waitq,
345 recovd->recovd_state == RECOVD_READY);
347 ptlrpc_recovd = recovd;
348 class_signal_connection_failure = recovd_conn_fail;
353 int recovd_setup(struct recovd_obd *recovd)
359 int recovd_cleanup(struct recovd_obd *recovd)
362 spin_lock(&recovd->recovd_lock);
363 recovd->recovd_state = RECOVD_STOPPING;
364 wake_up(&recovd->recovd_waitq);
365 spin_unlock(&recovd->recovd_lock);
367 wait_event(recovd->recovd_ctl_waitq,
368 (recovd->recovd_state == RECOVD_STOPPED));
372 struct recovd_obd *ptlrpc_recovd;