Whamcloud - gitweb
b=20595
[fs/lustre-release.git] / lustre / ptlrpc / recover.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ptlrpc/recover.c
37  *
38  * Author: Mike Shaver <shaver@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_RPC
42 #ifdef __KERNEL__
43 # include <libcfs/libcfs.h>
44 #else
45 # include <liblustre.h>
46 #endif
47
48 #include <obd_support.h>
49 #include <lustre_ha.h>
50 #include <lustre_net.h>
51 #include <lustre_import.h>
52 #include <lustre_export.h>
53 #include <obd.h>
54 #include <obd_ost.h>
55 #include <obd_class.h>
56 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
57 #include <libcfs/list.h>
58
59 #include "ptlrpc_internal.h"
60
61 static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
62
63 void ptlrpc_initiate_recovery(struct obd_import *imp)
64 {
65         ENTRY;
66
67         CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
68         ptlrpc_connect_import(imp, NULL);
69
70         EXIT;
71 }
72
73 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
74 {
75         int rc = 0;
76         struct list_head *tmp, *pos;
77         struct ptlrpc_request *req = NULL;
78         __u64 last_transno;
79         ENTRY;
80
81         *inflight = 0;
82
83         /* It might have committed some after we last spoke, so make sure we
84          * get rid of them now.
85          */
86         spin_lock(&imp->imp_lock);
87         imp->imp_last_transno_checked = 0;
88         ptlrpc_free_committed(imp);
89         last_transno = imp->imp_last_replay_transno;
90         spin_unlock(&imp->imp_lock);
91
92         CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
93                imp, obd2cli_tgt(imp->imp_obd),
94                imp->imp_peer_committed_transno, last_transno);
95
96         /* Do I need to hold a lock across this iteration?  We shouldn't be
97          * racing with any additions to the list, because we're in recovery
98          * and are therefore not processing additional requests to add.  Calls
99          * to ptlrpc_free_committed might commit requests, but nothing "newer"
100          * than the one we're replaying (it can't be committed until it's
101          * replayed, and we're doing that here).  l_f_e_safe protects against
102          * problems with the current request being committed, in the unlikely
103          * event of that race.  So, in conclusion, I think that it's safe to
104          * perform this list-walk without the imp_lock held.
105          *
106          * But, the {mdc,osc}_replay_open callbacks both iterate
107          * request lists, and have comments saying they assume the
108          * imp_lock is being held by ptlrpc_replay, but it's not. it's
109          * just a little race...
110          */
111         list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
112                 req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
113
114                 /* If need to resend the last sent transno (because a
115                    reconnect has occurred), then stop on the matching
116                    req and send it again. If, however, the last sent
117                    transno has been committed then we continue replay
118                    from the next request. */
119                 if (req->rq_transno > last_transno) {
120                         if (imp->imp_resend_replay)
121                                 lustre_msg_add_flags(req->rq_reqmsg,
122                                                      MSG_RESENT);
123                         break;
124                 }
125                 req = NULL;
126         }
127
128         spin_lock(&imp->imp_lock);
129         imp->imp_resend_replay = 0;
130         spin_unlock(&imp->imp_lock);
131
132         if (req != NULL) {
133                 rc = ptlrpc_replay_req(req);
134                 if (rc) {
135                         CERROR("recovery replay error %d for req "
136                                LPU64"\n", rc, req->rq_xid);
137                         RETURN(rc);
138                 }
139                 *inflight = 1;
140         }
141         RETURN(rc);
142 }
143
144 int ptlrpc_resend(struct obd_import *imp)
145 {
146         struct ptlrpc_request *req, *next;
147
148         ENTRY;
149
150         /* As long as we're in recovery, nothing should be added to the sending
151          * list, so we don't need to hold the lock during this iteration and
152          * resend process.
153          */
154         /* Well... what if lctl recover is called twice at the same time?
155          */
156         spin_lock(&imp->imp_lock);
157         if (imp->imp_state != LUSTRE_IMP_RECOVER) {
158                 spin_unlock(&imp->imp_lock);
159                 RETURN(-1);
160         }
161
162         list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) {
163                 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
164                          "req %p bad\n", req);
165                 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
166                 if (!req->rq_no_resend)
167                         ptlrpc_resend_req(req);
168         }
169         spin_unlock(&imp->imp_lock);
170
171         RETURN(0);
172 }
173
174 void ptlrpc_wake_delayed(struct obd_import *imp)
175 {
176         struct list_head *tmp, *pos;
177         struct ptlrpc_request *req;
178
179         spin_lock(&imp->imp_lock);
180         list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
181                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
182
183                 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
184                 ptlrpc_client_wake_req(req);
185         }
186         spin_unlock(&imp->imp_lock);
187 }
188
189 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
190 {
191         struct obd_import *imp = failed_req->rq_import;
192         ENTRY;
193
194         CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
195                imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
196                imp->imp_connection->c_remote_uuid.uuid);
197
198         if (ptlrpc_set_import_discon(imp,
199                               lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
200                 if (!imp->imp_replayable) {
201                         CDEBUG(D_HA, "import %s@%s for %s not replayable, "
202                                "auto-deactivating\n",
203                                obd2cli_tgt(imp->imp_obd),
204                                imp->imp_connection->c_remote_uuid.uuid,
205                                imp->imp_obd->obd_name);
206                         ptlrpc_deactivate_import(imp);
207                 }
208                 /* to control recovery via lctl {disable|enable}_recovery */
209                 if (imp->imp_deactive == 0)
210                         ptlrpc_connect_import(imp, NULL);
211         }
212
213         /* Wait for recovery to complete and resend. If evicted, then
214            this request will be errored out later.*/
215         spin_lock(&failed_req->rq_lock);
216         if (!failed_req->rq_no_resend)
217                 failed_req->rq_resend = 1;
218         spin_unlock(&failed_req->rq_lock);
219
220         EXIT;
221 }
222
223 /*
224  * Administratively active/deactive a client. 
225  * This should only be called by the ioctl interface, currently
226  *  - the lctl deactivate and activate commands
227  *  - echo 0/1 >> /proc/osc/XXX/active
228  *  - client umount -f (ll_umount_begin)
229  */
230 int ptlrpc_set_import_active(struct obd_import *imp, int active)
231 {
232         struct obd_device *obd = imp->imp_obd;
233         int rc = 0;
234
235         ENTRY;
236         LASSERT(obd);
237
238         /* When deactivating, mark import invalid, and abort in-flight
239          * requests. */
240         if (!active) {
241                 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
242                               "request\n", obd2cli_tgt(imp->imp_obd));
243
244                 /* set before invalidate to avoid messages about imp_inval
245                  * set without imp_deactive in ptlrpc_import_delay_req */
246                 spin_lock(&imp->imp_lock);
247                 imp->imp_deactive = 1;
248                 spin_unlock(&imp->imp_lock);
249
250                 ptlrpc_invalidate_import(imp);
251         }
252
253         /* When activating, mark import valid, and attempt recovery */
254         if (active) {
255                 CDEBUG(D_HA, "setting import %s VALID\n",
256                        obd2cli_tgt(imp->imp_obd));
257                 rc = ptlrpc_recover_import(imp, NULL);
258         }
259
260         RETURN(rc);
261 }
262
263 /* Attempt to reconnect an import */
264 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
265 {
266         int rc;
267         ENTRY;
268
269         spin_lock(&imp->imp_lock);
270         if (atomic_read(&imp->imp_inval_count)) {
271                 spin_unlock(&imp->imp_lock);
272                 RETURN(-EINVAL);
273         }
274         spin_unlock(&imp->imp_lock);
275
276         /* force import to be disconnected. */
277         ptlrpc_set_import_discon(imp, 0);
278
279         spin_lock(&imp->imp_lock);
280         imp->imp_deactive = 0;
281         spin_unlock(&imp->imp_lock);
282
283         rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
284
285         RETURN(rc);
286 }
287
288 int ptlrpc_import_in_recovery(struct obd_import *imp)
289 {
290         int in_recovery = 1;
291         spin_lock(&imp->imp_lock);
292         if (imp->imp_state == LUSTRE_IMP_FULL ||
293             imp->imp_state == LUSTRE_IMP_CLOSED ||
294             imp->imp_state == LUSTRE_IMP_DISCON)
295                 in_recovery = 0;
296         spin_unlock(&imp->imp_lock);
297         return in_recovery;
298 }
299
300 static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
301                                           char *new_uuid)
302 {
303         int rc;
304         int in_recovery = 0;
305         struct l_wait_info lwi;
306         ENTRY;
307
308         /* Check if reconnect is already in progress */
309         spin_lock(&imp->imp_lock);
310         if (imp->imp_state != LUSTRE_IMP_DISCON) {
311                 in_recovery = 1;
312         }
313         spin_unlock(&imp->imp_lock);
314
315         if (in_recovery == 1)
316                 RETURN(-EALREADY);
317
318         rc = ptlrpc_connect_import(imp, new_uuid);
319         if (rc)
320                 RETURN(rc);
321
322         CDEBUG(D_HA, "%s: recovery started, waiting\n",
323                obd2cli_tgt(imp->imp_obd));
324
325         lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), 
326                           NULL, NULL);
327         rc = l_wait_event(imp->imp_recovery_waitq,
328                           !ptlrpc_import_in_recovery(imp), &lwi);
329         CDEBUG(D_HA, "%s: recovery finished\n",
330                obd2cli_tgt(imp->imp_obd));
331
332         RETURN(rc);
333 }