Whamcloud - gitweb
Bump the sanity-max to 1000, since we have way, way more than 50
[fs/lustre-release.git] / lustre / ptlrpc / recovd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  obd/rpc/recovd.c
5  *
6  *  Lustre High Availability Daemon
7  *
8  *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
9  *
10  *  This code is issued under the GNU General Public License.
11  *  See the file COPYING in this distribution
12  *
13  *  by Peter Braam <braam@clusterfs.com>
14  *
15  */
16
17 #define DEBUG_SUBSYSTEM S_RPC
18
19 #include <linux/lustre_lite.h>
20 #include <linux/lustre_ha.h>
21 #include <linux/obd_support.h>
22
23 /* dump_connection_list, but shorter for nicer debugging logs */
24 static void d_c_l(struct list_head *head)
25 {
26         int sanity = 0;
27         struct list_head *tmp;
28
29         list_for_each(tmp, head) {
30                 struct ptlrpc_connection *conn =
31                         list_entry(tmp, struct ptlrpc_connection,
32                                    c_recovd_data.rd_managed_chain);
33                 CDEBUG(D_HA, "   %p = %s (%d/%d)\n", conn, conn->c_remote_uuid,
34                        conn->c_recovd_data.rd_phase,
35                        conn->c_recovd_data.rd_next_phase);
36                 if (sanity++ > 1000)
37                         LBUG();
38         }
39 }
40
41 static void dump_lists(struct recovd_obd *recovd)
42 {
43         CDEBUG(D_HA, "managed: \n");
44         d_c_l(&recovd->recovd_managed_items);
45         CDEBUG(D_HA, "troubled: \n");
46         d_c_l(&recovd->recovd_troubled_items);
47 }
48
49 void recovd_conn_manage(struct ptlrpc_connection *conn,
50                         struct recovd_obd *recovd, ptlrpc_recovery_cb_t recover)
51 {
52         struct recovd_data *rd = &conn->c_recovd_data;
53         ENTRY;
54         if (!recovd || !recover) {
55                 EXIT;
56                 return;
57         }
58
59         if (!list_empty(&rd->rd_managed_chain)) {
60                 if (rd->rd_recovd == recovd && rd->rd_recover == recover) {
61                         CDEBUG(D_HA, "conn %p/%s already setup for recovery\n",
62                                conn, conn->c_remote_uuid);
63                         EXIT;
64                         return;
65                 }
66                 CDEBUG(D_HA,
67                        "conn %p/%s has recovery items %p/%p, making %p/%p\n",
68                        conn, conn->c_remote_uuid, rd->rd_recovd, rd->rd_recover,
69                        recovd, recover);
70                 spin_lock(&rd->rd_recovd->recovd_lock);
71                 list_del(&rd->rd_managed_chain);
72                 spin_unlock(&rd->rd_recovd->recovd_lock);
73         }
74
75         rd->rd_recovd = recovd;
76         rd->rd_recover = recover;
77         rd->rd_phase = RD_IDLE;
78         rd->rd_next_phase = RD_TROUBLED;
79
80         spin_lock(&recovd->recovd_lock);
81         list_add(&rd->rd_managed_chain, &recovd->recovd_managed_items);
82         dump_lists(recovd);
83         spin_unlock(&recovd->recovd_lock);
84
85         EXIT;
86 }
87
88 void recovd_conn_fail(struct ptlrpc_connection *conn)
89 {
90         struct recovd_data *rd = &conn->c_recovd_data;
91         struct recovd_obd *recovd = rd->rd_recovd;
92         ENTRY;
93
94         if (!recovd) {
95                 CERROR("no recovd for connection %p\n", conn);
96                 EXIT;
97                 return;
98         }
99
100         spin_lock(&recovd->recovd_lock);
101         if (rd->rd_phase != RD_IDLE) {
102                 CERROR("connection %p to %s already in recovery\n",
103                        conn, conn->c_remote_uuid);
104                 /* XXX need to distinguish from failure-in-recovery */
105                 spin_unlock(&recovd->recovd_lock);
106                 EXIT;
107                 return;
108         }
109                 
110         CERROR("connection %p to %s failed\n", conn, conn->c_remote_uuid);
111         list_del(&rd->rd_managed_chain);
112         list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
113         rd->rd_phase = RD_TROUBLED;
114         dump_lists(recovd);
115         spin_unlock(&recovd->recovd_lock);
116
117         wake_up(&recovd->recovd_waitq);
118
119         EXIT;
120 }
121
122 void recovd_conn_fixed(struct ptlrpc_connection *conn)
123 {
124         struct recovd_data *rd = &conn->c_recovd_data;
125         ENTRY;
126
127         CDEBUG(D_HA, "connection %p (now to %s) fixed\n",
128                conn, conn->c_remote_uuid);
129         spin_lock(&rd->rd_recovd->recovd_lock);
130         list_del(&rd->rd_managed_chain);
131         rd->rd_phase = RD_IDLE;
132         rd->rd_next_phase = RD_TROUBLED;
133         list_add(&rd->rd_managed_chain, &rd->rd_recovd->recovd_managed_items);
134         dump_lists(rd->rd_recovd);
135         spin_unlock(&rd->rd_recovd->recovd_lock);
136
137         EXIT;
138 }
139
140
141 static int recovd_check_event(struct recovd_obd *recovd)
142 {
143         int rc = 0;
144         struct list_head *tmp;
145
146         ENTRY;
147
148         spin_lock(&recovd->recovd_lock);
149
150         if (recovd->recovd_state == RECOVD_STOPPING)
151                 GOTO(out, rc = 1);
152
153         list_for_each(tmp, &recovd->recovd_troubled_items) {
154
155                 struct recovd_data *rd = list_entry(tmp, struct recovd_data,
156                                                     rd_managed_chain);
157
158                 if (rd->rd_phase == rd->rd_next_phase ||
159                     rd->rd_phase == RD_FAILED)
160                         GOTO(out, rc = 1);
161         }
162
163  out:
164         spin_unlock(&recovd->recovd_lock);
165         RETURN(rc);
166 }
167
168 static int recovd_handle_event(struct recovd_obd *recovd)
169 {
170         struct list_head *tmp, *n;
171         int rc = 0;
172         ENTRY;
173
174         spin_lock(&recovd->recovd_lock);
175
176         dump_lists(recovd);
177
178         /*
179          * We use _safe here because one of the callbacks, expecially
180          * FAILURE or PREPARED, could move list items around.
181          */
182         list_for_each_safe(tmp, n, &recovd->recovd_troubled_items) {
183                 struct recovd_data *rd = list_entry(tmp, struct recovd_data,
184                                                     rd_managed_chain);
185
186                 if (rd->rd_phase != RD_FAILED &&
187                     rd->rd_phase != rd->rd_next_phase)
188                         continue;
189
190                 switch (rd->rd_phase) {
191                     case RD_FAILED:
192                 cb_failed: /* must always reach here with recovd_lock held! */
193                         CERROR("recovery FAILED for rd %p (conn %p): %d\n",
194                                rd, class_rd2conn(rd), rc);
195                         
196                         spin_unlock(&recovd->recovd_lock);
197                         (void)rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_FAILURE);
198                         spin_lock(&recovd->recovd_lock);
199                         break;
200                         
201                     case RD_TROUBLED:
202                         if (!rd->rd_recover) {
203                                 CERROR("no rd_recover for rd %p (conn %p)\n",
204                                        rd, class_rd2conn(rd));
205                                 rc = -EINVAL;
206                                 break;
207                         }
208                         CERROR("starting recovery for rd %p (conn %p)\n",
209                                rd, class_rd2conn(rd));
210                         rd->rd_phase = RD_PREPARING;
211                         rd->rd_next_phase = RD_PREPARED;
212                         
213                         spin_unlock(&recovd->recovd_lock);
214                         rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_PREPARE);
215                         spin_lock(&recovd->recovd_lock);
216                         if (rc)
217                                 goto cb_failed;
218                         
219                         break;
220                         
221                     case RD_PREPARED:
222                         
223                         CERROR("recovery prepared for rd %p (conn %p)\n",
224                                rd, class_rd2conn(rd));
225                         rd->rd_phase = RD_RECOVERING;
226                         rd->rd_next_phase = RD_RECOVERED;
227                         
228                         spin_unlock(&recovd->recovd_lock);
229                         rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_RECOVER);
230                         spin_lock(&recovd->recovd_lock);
231                         if (rc)
232                                 goto cb_failed;
233                         
234                         break;
235                         
236                     case RD_RECOVERED:
237                         rd->rd_phase = RD_IDLE;
238                         rd->rd_next_phase = RD_TROUBLED;
239                         
240                         CERROR("recovery complete for rd %p (conn %p)\n",
241                                rd, class_rd2conn(rd));
242                         break;
243                         
244                     default:
245                         break;
246                 }
247         }
248         spin_unlock(&recovd->recovd_lock);
249         RETURN(0);
250 }
251
252 static int recovd_main(void *arg)
253 {
254         struct recovd_obd *recovd = (struct recovd_obd *)arg;
255
256         ENTRY;
257
258         lock_kernel();
259         daemonize();
260         spin_lock_irq(&current->sigmask_lock);
261         sigfillset(&current->blocked);
262         recalc_sigpending(current);
263         spin_unlock_irq(&current->sigmask_lock);
264
265         sprintf(current->comm, "lustre_recovd");
266         unlock_kernel();
267
268         /* Signal that the thread is running. */
269         recovd->recovd_thread = current;
270         recovd->recovd_state = RECOVD_READY;
271         wake_up(&recovd->recovd_ctl_waitq);
272
273         /* And now, loop forever on requests. */
274         while (1) {
275                 wait_event(recovd->recovd_waitq, recovd_check_event(recovd));
276                 if (recovd->recovd_state == RECOVD_STOPPING)
277                         break;
278                 recovd_handle_event(recovd);
279         }
280
281         recovd->recovd_thread = NULL;
282         recovd->recovd_state = RECOVD_STOPPED;
283         wake_up(&recovd->recovd_ctl_waitq);
284         CDEBUG(D_HA, "mgr exiting process %d\n", current->pid);
285         RETURN(0);
286 }
287
288 int recovd_setup(struct recovd_obd *recovd)
289 {
290         int rc;
291
292         ENTRY;
293
294         INIT_LIST_HEAD(&recovd->recovd_managed_items);
295         INIT_LIST_HEAD(&recovd->recovd_troubled_items);
296         spin_lock_init(&recovd->recovd_lock);
297
298         init_waitqueue_head(&recovd->recovd_waitq);
299         init_waitqueue_head(&recovd->recovd_recovery_waitq);
300         init_waitqueue_head(&recovd->recovd_ctl_waitq);
301
302         rc = kernel_thread(recovd_main, (void *)recovd,
303                            CLONE_VM | CLONE_FS | CLONE_FILES);
304         if (rc < 0) {
305                 CERROR("cannot start thread\n");
306                 RETURN(-EINVAL);
307         }
308         wait_event(recovd->recovd_ctl_waitq,
309                    recovd->recovd_state == RECOVD_READY);
310
311         ptlrpc_recovd = recovd;
312         class_signal_connection_failure = recovd_conn_fail;
313
314         RETURN(0);
315 }
316
317 int recovd_cleanup(struct recovd_obd *recovd)
318 {
319         ENTRY;
320         spin_lock(&recovd->recovd_lock);
321         recovd->recovd_state = RECOVD_STOPPING;
322         wake_up(&recovd->recovd_waitq);
323         spin_unlock(&recovd->recovd_lock);
324
325         wait_event(recovd->recovd_ctl_waitq,
326                    (recovd->recovd_state == RECOVD_STOPPED));
327         RETURN(0);
328 }
329
330 struct recovd_obd *ptlrpc_recovd;