Whamcloud - gitweb
Merge b_md to HEAD for 0.5.19 release.
[fs/lustre-release.git] / lustre / ptlrpc / recovd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  obd/rpc/recovd.c
5  *
6  *  Lustre High Availability Daemon
7  *
8  *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
9  *
10  *  This code is issued under the GNU General Public License.
11  *  See the file COPYING in this distribution
12  *
13  *  by Peter Braam <braam@clusterfs.com>
14  *
15  */
16
17 #define DEBUG_SUBSYSTEM S_RPC
18
19 #include <linux/lustre_lite.h>
20 #include <linux/lustre_ha.h>
21 #include <linux/obd_support.h>
22
23 /* dump_connection_list, but shorter for nicer debugging logs */
24 static void d_c_l(struct list_head *head)
25 {
26         struct list_head *tmp;
27
28         list_for_each(tmp, head) {
29                 struct ptlrpc_connection *conn =
30                         list_entry(tmp, struct ptlrpc_connection,
31                                    c_recovd_data.rd_managed_chain);
32                 CDEBUG(D_HA, "   %p = %s (%d/%d)\n", conn, conn->c_remote_uuid,
33                        conn->c_recovd_data.rd_phase,
34                        conn->c_recovd_data.rd_next_phase);
35         }
36 }
37
38 static void dump_lists(struct recovd_obd *recovd)
39 {
40         CDEBUG(D_HA, "managed: \n");
41         d_c_l(&recovd->recovd_managed_items);
42         CDEBUG(D_HA, "troubled: \n");
43         d_c_l(&recovd->recovd_troubled_items);
44 }
45
46 void recovd_conn_manage(struct ptlrpc_connection *conn,
47                         struct recovd_obd *recovd, ptlrpc_recovery_cb_t recover)
48 {
49         struct recovd_data *rd = &conn->c_recovd_data;
50         ENTRY;
51         if (!recovd || !recover) {
52                 EXIT;
53                 return;
54         }
55
56         if (!list_empty(&rd->rd_managed_chain)) {
57                 if (rd->rd_recovd == recovd && rd->rd_recover == recover) {
58                         CDEBUG(D_HA, "conn %p/%s already setup for recovery\n",
59                                conn, conn->c_remote_uuid);
60                         EXIT;
61                         return;
62                 }
63                 CDEBUG(D_HA,
64                        "conn %p/%s has recovery items %p/%p, making %p/%p\n",
65                        conn, conn->c_remote_uuid, rd->rd_recovd, rd->rd_recover,
66                        recovd, recover);
67                 spin_lock(&rd->rd_recovd->recovd_lock);
68                 list_del_init(&rd->rd_managed_chain);
69                 spin_unlock(&rd->rd_recovd->recovd_lock);
70         }
71
72         rd->rd_recovd = recovd;
73         rd->rd_recover = recover;
74         rd->rd_phase = RD_IDLE;
75         rd->rd_next_phase = RD_TROUBLED;
76
77         spin_lock(&recovd->recovd_lock);
78         list_add(&rd->rd_managed_chain, &recovd->recovd_managed_items);
79         dump_lists(recovd);
80         spin_unlock(&recovd->recovd_lock);
81
82         EXIT;
83 }
84
85 void recovd_conn_unmanage(struct ptlrpc_connection *conn)
86 {
87         struct recovd_data *rd = &conn->c_recovd_data;
88         struct recovd_obd *recovd = rd->rd_recovd;
89         ENTRY;
90
91         if (recovd) {
92                 spin_lock(&recovd->recovd_lock);
93                 list_del_init(&rd->rd_managed_chain);
94                 rd->rd_recovd = NULL;
95                 spin_unlock(&recovd->recovd_lock);
96         }
97         /* should be safe enough, right? */
98         rd->rd_recover = NULL;
99         rd->rd_next_phase = RD_IDLE;
100         rd->rd_next_phase = RD_TROUBLED;
101 }
102
103 void recovd_conn_fail(struct ptlrpc_connection *conn)
104 {
105         struct recovd_data *rd = &conn->c_recovd_data;
106         struct recovd_obd *recovd = rd->rd_recovd;
107         ENTRY;
108
109         if (!recovd) {
110                 CERROR("no recovd for connection %p\n", conn);
111                 EXIT;
112                 return;
113         }
114
115         spin_lock(&recovd->recovd_lock);
116         if (rd->rd_phase == RD_TROUBLED || rd->rd_phase == RD_PREPARING) {
117                 CDEBUG(D_HA, "connection %p to %s already in recovery\n",
118                        conn, conn->c_remote_uuid);
119                 spin_unlock(&recovd->recovd_lock);
120                 EXIT;
121                 return;
122         }
123
124         CERROR("connection %p to %s (%08x %08lx %08lx) failed\n", conn,
125                conn->c_remote_uuid, conn->c_peer.peer_nid,
126                conn->c_peer.peer_ni.nal_idx, conn->c_peer.peer_ni.handle_idx);
127         list_del(&rd->rd_managed_chain);
128         list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
129         if (rd->rd_phase != RD_IDLE) {
130                 CDEBUG(D_HA,
131                        "connection %p to %s failed in recovery: restarting\n",
132                        conn, conn->c_remote_uuid);
133                 /* XXX call callback with PHASE_FAILED? */
134                 rd->rd_next_phase = RD_TROUBLED;
135         }
136         rd->rd_phase = RD_TROUBLED;
137         dump_lists(recovd);
138         spin_unlock(&recovd->recovd_lock);
139
140         wake_up(&recovd->recovd_waitq);
141
142         EXIT;
143 }
144
145 void recovd_conn_fixed(struct ptlrpc_connection *conn)
146 {
147         struct recovd_data *rd = &conn->c_recovd_data;
148         ENTRY;
149
150         CDEBUG(D_HA, "connection %p (now to %s) fixed\n",
151                conn, conn->c_remote_uuid);
152         spin_lock(&rd->rd_recovd->recovd_lock);
153         list_del(&rd->rd_managed_chain);
154         rd->rd_phase = RD_IDLE;
155         rd->rd_next_phase = RD_TROUBLED;
156         list_add(&rd->rd_managed_chain, &rd->rd_recovd->recovd_managed_items);
157         dump_lists(rd->rd_recovd);
158         spin_unlock(&rd->rd_recovd->recovd_lock);
159
160         EXIT;
161 }
162
163 static int recovd_check_event(struct recovd_obd *recovd)
164 {
165         int rc = 0;
166         struct list_head *tmp;
167
168         ENTRY;
169
170         spin_lock(&recovd->recovd_lock);
171
172         if (recovd->recovd_state == RECOVD_STOPPING)
173                 GOTO(out, rc = 1);
174
175         list_for_each(tmp, &recovd->recovd_troubled_items) {
176
177                 struct recovd_data *rd = list_entry(tmp, struct recovd_data,
178                                                     rd_managed_chain);
179
180                 if (rd->rd_phase == rd->rd_next_phase ||
181                     rd->rd_phase == RD_FAILED)
182                         GOTO(out, rc = 1);
183         }
184
185  out:
186         spin_unlock(&recovd->recovd_lock);
187         RETURN(rc);
188 }
189
190 static int recovd_handle_event(struct recovd_obd *recovd)
191 {
192         struct list_head *tmp, *n;
193         int rc = 0;
194         ENTRY;
195
196         spin_lock(&recovd->recovd_lock);
197
198         dump_lists(recovd);
199
200         /*
201          * We use _safe here because one of the callbacks, expecially
202          * FAILURE or PREPARED, could move list items around.
203          */
204         list_for_each_safe(tmp, n, &recovd->recovd_troubled_items) {
205                 struct recovd_data *rd = list_entry(tmp, struct recovd_data,
206                                                     rd_managed_chain);
207
208                 if (rd->rd_phase != RD_FAILED &&
209                     rd->rd_phase != rd->rd_next_phase)
210                         continue;
211
212                 switch (rd->rd_phase) {
213                     case RD_FAILED:
214                 cb_failed: /* must always reach here with recovd_lock held! */
215                         CERROR("recovery FAILED for rd %p (conn %p): %d\n",
216                                rd, class_rd2conn(rd), rc);
217
218                         spin_unlock(&recovd->recovd_lock);
219                         (void)rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_FAILURE);
220                         spin_lock(&recovd->recovd_lock);
221                         break;
222
223                     case RD_TROUBLED:
224                         if (!rd->rd_recover) {
225                                 CERROR("no rd_recover for rd %p (conn %p)\n",
226                                        rd, class_rd2conn(rd));
227                                 rc = -EINVAL;
228                                 break;
229                         }
230                         CERROR("starting recovery for rd %p (conn %p)\n",
231                                rd, class_rd2conn(rd));
232                         rd->rd_phase = RD_PREPARING;
233                         rd->rd_next_phase = RD_PREPARED;
234
235                         spin_unlock(&recovd->recovd_lock);
236                         rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_PREPARE);
237                         spin_lock(&recovd->recovd_lock);
238                         if (rc)
239                                 goto cb_failed;
240
241                         break;
242
243                     case RD_PREPARED:
244
245                         CERROR("recovery prepared for rd %p (conn %p)\n",
246                                rd, class_rd2conn(rd));
247                         rd->rd_phase = RD_RECOVERING;
248                         rd->rd_next_phase = RD_RECOVERED;
249
250                         spin_unlock(&recovd->recovd_lock);
251                         rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_RECOVER);
252                         spin_lock(&recovd->recovd_lock);
253                         if (rc)
254                                 goto cb_failed;
255
256                         break;
257
258                     case RD_RECOVERED:
259                         rd->rd_phase = RD_IDLE;
260                         rd->rd_next_phase = RD_TROUBLED;
261
262                         CERROR("recovery complete for rd %p (conn %p)\n",
263                                rd, class_rd2conn(rd));
264                         break;
265
266                     default:
267                         break;
268                 }
269         }
270         spin_unlock(&recovd->recovd_lock);
271         RETURN(0);
272 }
273
274 static int recovd_main(void *arg)
275 {
276         struct recovd_obd *recovd = (struct recovd_obd *)arg;
277         unsigned long flags;
278         ENTRY;
279
280         lock_kernel();
281         daemonize();
282
283 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
284         sigfillset(&current->blocked);
285         recalc_sigpending();
286 #else
287         spin_lock_irqsave(&current->sigmask_lock, flags);
288         sigfillset(&current->blocked);
289         recalc_sigpending(current);
290         spin_unlock_irqrestore(&current->sigmask_lock, flags);
291 #endif
292
293         sprintf(current->comm, "lustre_recovd");
294         unlock_kernel();
295
296         /* Signal that the thread is running. */
297         recovd->recovd_thread = current;
298         recovd->recovd_state = RECOVD_READY;
299         wake_up(&recovd->recovd_ctl_waitq);
300
301         /* And now, loop forever on requests. */
302         while (1) {
303                 wait_event(recovd->recovd_waitq, recovd_check_event(recovd));
304                 if (recovd->recovd_state == RECOVD_STOPPING)
305                         break;
306                 recovd_handle_event(recovd);
307         }
308
309         recovd->recovd_thread = NULL;
310         recovd->recovd_state = RECOVD_STOPPED;
311         wake_up(&recovd->recovd_ctl_waitq);
312         CDEBUG(D_HA, "mgr exiting process %d\n", current->pid);
313         RETURN(0);
314 }
315
316 int recovd_setup(struct recovd_obd *recovd)
317 {
318         int rc;
319
320         ENTRY;
321
322         INIT_LIST_HEAD(&recovd->recovd_managed_items);
323         INIT_LIST_HEAD(&recovd->recovd_troubled_items);
324         spin_lock_init(&recovd->recovd_lock);
325
326         init_waitqueue_head(&recovd->recovd_waitq);
327         init_waitqueue_head(&recovd->recovd_recovery_waitq);
328         init_waitqueue_head(&recovd->recovd_ctl_waitq);
329
330         rc = kernel_thread(recovd_main, (void *)recovd,
331                            CLONE_VM | CLONE_FS | CLONE_FILES);
332         if (rc < 0) {
333                 CERROR("cannot start thread\n");
334                 RETURN(-EINVAL);
335         }
336         wait_event(recovd->recovd_ctl_waitq,
337                    recovd->recovd_state == RECOVD_READY);
338
339         ptlrpc_recovd = recovd;
340         class_signal_connection_failure = recovd_conn_fail;
341
342         RETURN(0);
343 }
344
345 int recovd_cleanup(struct recovd_obd *recovd)
346 {
347         ENTRY;
348         spin_lock(&recovd->recovd_lock);
349         recovd->recovd_state = RECOVD_STOPPING;
350         wake_up(&recovd->recovd_waitq);
351         spin_unlock(&recovd->recovd_lock);
352
353         wait_event(recovd->recovd_ctl_waitq,
354                    (recovd->recovd_state == RECOVD_STOPPED));
355         RETURN(0);
356 }
357
358 struct recovd_obd *ptlrpc_recovd;