Whamcloud - gitweb
b=17167 libcfs: ensure all libcfs exported symbols to have cfs_ prefix
[fs/lustre-release.git] / lustre / ptlrpc / recover.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ptlrpc/recover.c
37  *
38  * Author: Mike Shaver <shaver@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_RPC
42 #ifdef __KERNEL__
43 # include <libcfs/libcfs.h>
44 #else
45 # include <liblustre.h>
46 #endif
47
48 #include <obd_support.h>
49 #include <lustre_ha.h>
50 #include <lustre_net.h>
51 #include <lustre_import.h>
52 #include <lustre_export.h>
53 #include <obd.h>
54 #include <obd_ost.h>
55 #include <obd_class.h>
56 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
57 #include <libcfs/list.h>
58
59 #include "ptlrpc_internal.h"
60
61 static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
62
63 void ptlrpc_initiate_recovery(struct obd_import *imp)
64 {
65         ENTRY;
66
67         CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
68         ptlrpc_connect_import(imp, NULL);
69
70         EXIT;
71 }
72
73 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
74 {
75         int rc = 0;
76         cfs_list_t *tmp, *pos;
77         struct ptlrpc_request *req = NULL;
78         __u64 last_transno;
79         ENTRY;
80
81         *inflight = 0;
82
83         /* It might have committed some after we last spoke, so make sure we
84          * get rid of them now.
85          */
86         cfs_spin_lock(&imp->imp_lock);
87         imp->imp_last_transno_checked = 0;
88         ptlrpc_free_committed(imp);
89         last_transno = imp->imp_last_replay_transno;
90         cfs_spin_unlock(&imp->imp_lock);
91
92         CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
93                imp, obd2cli_tgt(imp->imp_obd),
94                imp->imp_peer_committed_transno, last_transno);
95
96         /* Do I need to hold a lock across this iteration?  We shouldn't be
97          * racing with any additions to the list, because we're in recovery
98          * and are therefore not processing additional requests to add.  Calls
99          * to ptlrpc_free_committed might commit requests, but nothing "newer"
100          * than the one we're replaying (it can't be committed until it's
101          * replayed, and we're doing that here).  l_f_e_safe protects against
102          * problems with the current request being committed, in the unlikely
103          * event of that race.  So, in conclusion, I think that it's safe to
104          * perform this list-walk without the imp_lock held.
105          *
106          * But, the {mdc,osc}_replay_open callbacks both iterate
107          * request lists, and have comments saying they assume the
108          * imp_lock is being held by ptlrpc_replay, but it's not. it's
109          * just a little race...
110          */
111         cfs_list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
112                 req = cfs_list_entry(tmp, struct ptlrpc_request,
113                                      rq_replay_list);
114
115                 /* If need to resend the last sent transno (because a
116                    reconnect has occurred), then stop on the matching
117                    req and send it again. If, however, the last sent
118                    transno has been committed then we continue replay
119                    from the next request. */
120                 if (req->rq_transno > last_transno) {
121                         if (imp->imp_resend_replay)
122                                 lustre_msg_add_flags(req->rq_reqmsg,
123                                                      MSG_RESENT);
124                         break;
125                 }
126                 req = NULL;
127         }
128
129         cfs_spin_lock(&imp->imp_lock);
130         imp->imp_resend_replay = 0;
131         cfs_spin_unlock(&imp->imp_lock);
132
133         if (req != NULL) {
134                 rc = ptlrpc_replay_req(req);
135                 if (rc) {
136                         CERROR("recovery replay error %d for req "
137                                LPU64"\n", rc, req->rq_xid);
138                         RETURN(rc);
139                 }
140                 *inflight = 1;
141         }
142         RETURN(rc);
143 }
144
145 int ptlrpc_resend(struct obd_import *imp)
146 {
147         struct ptlrpc_request *req, *next;
148
149         ENTRY;
150
151         /* As long as we're in recovery, nothing should be added to the sending
152          * list, so we don't need to hold the lock during this iteration and
153          * resend process.
154          */
155         /* Well... what if lctl recover is called twice at the same time?
156          */
157         cfs_spin_lock(&imp->imp_lock);
158         if (imp->imp_state != LUSTRE_IMP_RECOVER) {
159                 cfs_spin_unlock(&imp->imp_lock);
160                 RETURN(-1);
161         }
162
163         cfs_list_for_each_entry_safe(req, next, &imp->imp_sending_list,
164                                      rq_list) {
165                 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
166                          "req %p bad\n", req);
167                 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
168                 if (!req->rq_no_resend)
169                         ptlrpc_resend_req(req);
170         }
171         cfs_spin_unlock(&imp->imp_lock);
172
173         RETURN(0);
174 }
175
176 void ptlrpc_wake_delayed(struct obd_import *imp)
177 {
178         cfs_list_t *tmp, *pos;
179         struct ptlrpc_request *req;
180
181         cfs_spin_lock(&imp->imp_lock);
182         cfs_list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
183                 req = cfs_list_entry(tmp, struct ptlrpc_request, rq_list);
184
185                 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
186                 ptlrpc_client_wake_req(req);
187         }
188         cfs_spin_unlock(&imp->imp_lock);
189 }
190
191 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
192 {
193         struct obd_import *imp = failed_req->rq_import;
194         ENTRY;
195
196         CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
197                imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
198                imp->imp_connection->c_remote_uuid.uuid);
199
200         if (ptlrpc_set_import_discon(imp,
201                               lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
202                 if (!imp->imp_replayable) {
203                         CDEBUG(D_HA, "import %s@%s for %s not replayable, "
204                                "auto-deactivating\n",
205                                obd2cli_tgt(imp->imp_obd),
206                                imp->imp_connection->c_remote_uuid.uuid,
207                                imp->imp_obd->obd_name);
208                         ptlrpc_deactivate_import(imp);
209                 }
210                 /* to control recovery via lctl {disable|enable}_recovery */
211                 if (imp->imp_deactive == 0)
212                         ptlrpc_connect_import(imp, NULL);
213         }
214
215         /* Wait for recovery to complete and resend. If evicted, then
216            this request will be errored out later.*/
217         cfs_spin_lock(&failed_req->rq_lock);
218         if (!failed_req->rq_no_resend)
219                 failed_req->rq_resend = 1;
220         cfs_spin_unlock(&failed_req->rq_lock);
221
222         EXIT;
223 }
224
225 /*
226  * Administratively active/deactive a client. 
227  * This should only be called by the ioctl interface, currently
228  *  - the lctl deactivate and activate commands
229  *  - echo 0/1 >> /proc/osc/XXX/active
230  *  - client umount -f (ll_umount_begin)
231  */
232 int ptlrpc_set_import_active(struct obd_import *imp, int active)
233 {
234         struct obd_device *obd = imp->imp_obd;
235         int rc = 0;
236
237         ENTRY;
238         LASSERT(obd);
239
240         /* When deactivating, mark import invalid, and abort in-flight
241          * requests. */
242         if (!active) {
243                 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
244                               "request\n", obd2cli_tgt(imp->imp_obd));
245
246                 /* set before invalidate to avoid messages about imp_inval
247                  * set without imp_deactive in ptlrpc_import_delay_req */
248                 cfs_spin_lock(&imp->imp_lock);
249                 imp->imp_deactive = 1;
250                 cfs_spin_unlock(&imp->imp_lock);
251
252                 ptlrpc_invalidate_import(imp);
253         }
254
255         /* When activating, mark import valid, and attempt recovery */
256         if (active) {
257                 CDEBUG(D_HA, "setting import %s VALID\n",
258                        obd2cli_tgt(imp->imp_obd));
259                 rc = ptlrpc_recover_import(imp, NULL);
260         }
261
262         RETURN(rc);
263 }
264
265 /* Attempt to reconnect an import */
266 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
267 {
268         int rc;
269         ENTRY;
270
271         cfs_spin_lock(&imp->imp_lock);
272         if (cfs_atomic_read(&imp->imp_inval_count)) {
273                 cfs_spin_unlock(&imp->imp_lock);
274                 RETURN(-EINVAL);
275         }
276         cfs_spin_unlock(&imp->imp_lock);
277
278         /* force import to be disconnected. */
279         ptlrpc_set_import_discon(imp, 0);
280
281         cfs_spin_lock(&imp->imp_lock);
282         imp->imp_deactive = 0;
283         cfs_spin_unlock(&imp->imp_lock);
284
285         rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
286
287         RETURN(rc);
288 }
289
290 int ptlrpc_import_in_recovery(struct obd_import *imp)
291 {
292         int in_recovery = 1;
293         cfs_spin_lock(&imp->imp_lock);
294         if (imp->imp_state == LUSTRE_IMP_FULL ||
295             imp->imp_state == LUSTRE_IMP_CLOSED ||
296             imp->imp_state == LUSTRE_IMP_DISCON)
297                 in_recovery = 0;
298         cfs_spin_unlock(&imp->imp_lock);
299         return in_recovery;
300 }
301
302 static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
303                                           char *new_uuid)
304 {
305         int rc;
306         int in_recovery = 0;
307         struct l_wait_info lwi;
308         ENTRY;
309
310         /* Check if reconnect is already in progress */
311         cfs_spin_lock(&imp->imp_lock);
312         if (imp->imp_state != LUSTRE_IMP_DISCON) {
313                 in_recovery = 1;
314         }
315         cfs_spin_unlock(&imp->imp_lock);
316
317         if (in_recovery == 1)
318                 RETURN(-EALREADY);
319
320         rc = ptlrpc_connect_import(imp, new_uuid);
321         if (rc)
322                 RETURN(rc);
323
324         CDEBUG(D_HA, "%s: recovery started, waiting\n",
325                obd2cli_tgt(imp->imp_obd));
326
327         lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), 
328                           NULL, NULL);
329         rc = l_wait_event(imp->imp_recovery_waitq,
330                           !ptlrpc_import_in_recovery(imp), &lwi);
331         CDEBUG(D_HA, "%s: recovery finished\n",
332                obd2cli_tgt(imp->imp_obd));
333
334         RETURN(rc);
335 }