Whamcloud - gitweb
050d12554c527a3b756f451296469385197ed0a4
[fs/lustre-release.git] / lustre / ptlrpc / recover.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ptlrpc/recover.c
37  *
38  * Author: Mike Shaver <shaver@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_RPC
42 #ifdef __KERNEL__
43 # include <libcfs/libcfs.h>
44 #else
45 # include <liblustre.h>
46 #endif
47
48 #include <obd_support.h>
49 #include <lustre_ha.h>
50 #include <lustre_net.h>
51 #include <lustre_import.h>
52 #include <lustre_export.h>
53 #include <obd.h>
54 #include <obd_ost.h>
55 #include <obd_class.h>
56 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
57 #include <libcfs/list.h>
58
59 #include "ptlrpc_internal.h"
60
61 static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
62
63 /**
64  * Start recovery on disconnected import.
65  * This is done by just attempting a connect
66  */
67 void ptlrpc_initiate_recovery(struct obd_import *imp)
68 {
69         ENTRY;
70
71         CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
72         ptlrpc_connect_import(imp, NULL);
73
74         EXIT;
75 }
76
77 /**
78  * Identify what request from replay list needs to be replayed next
79  * (based on what we have already replayed) and send it to server.
80  */
81 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
82 {
83         int rc = 0;
84         cfs_list_t *tmp, *pos;
85         struct ptlrpc_request *req = NULL;
86         __u64 last_transno;
87         ENTRY;
88
89         *inflight = 0;
90
91         /* It might have committed some after we last spoke, so make sure we
92          * get rid of them now.
93          */
94         cfs_spin_lock(&imp->imp_lock);
95         imp->imp_last_transno_checked = 0;
96         ptlrpc_free_committed(imp);
97         last_transno = imp->imp_last_replay_transno;
98         cfs_spin_unlock(&imp->imp_lock);
99
100         CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
101                imp, obd2cli_tgt(imp->imp_obd),
102                imp->imp_peer_committed_transno, last_transno);
103
104         /* Do I need to hold a lock across this iteration?  We shouldn't be
105          * racing with any additions to the list, because we're in recovery
106          * and are therefore not processing additional requests to add.  Calls
107          * to ptlrpc_free_committed might commit requests, but nothing "newer"
108          * than the one we're replaying (it can't be committed until it's
109          * replayed, and we're doing that here).  l_f_e_safe protects against
110          * problems with the current request being committed, in the unlikely
111          * event of that race.  So, in conclusion, I think that it's safe to
112          * perform this list-walk without the imp_lock held.
113          *
114          * But, the {mdc,osc}_replay_open callbacks both iterate
115          * request lists, and have comments saying they assume the
116          * imp_lock is being held by ptlrpc_replay, but it's not. it's
117          * just a little race...
118          */
119         cfs_list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
120                 req = cfs_list_entry(tmp, struct ptlrpc_request,
121                                      rq_replay_list);
122
123                 /* If need to resend the last sent transno (because a
124                    reconnect has occurred), then stop on the matching
125                    req and send it again. If, however, the last sent
126                    transno has been committed then we continue replay
127                    from the next request. */
128                 if (req->rq_transno > last_transno) {
129                         if (imp->imp_resend_replay)
130                                 lustre_msg_add_flags(req->rq_reqmsg,
131                                                      MSG_RESENT);
132                         break;
133                 }
134                 req = NULL;
135         }
136
137         cfs_spin_lock(&imp->imp_lock);
138         imp->imp_resend_replay = 0;
139         cfs_spin_unlock(&imp->imp_lock);
140
141         if (req != NULL) {
142                 rc = ptlrpc_replay_req(req);
143                 if (rc) {
144                         CERROR("recovery replay error %d for req "
145                                LPU64"\n", rc, req->rq_xid);
146                         RETURN(rc);
147                 }
148                 *inflight = 1;
149         }
150         RETURN(rc);
151 }
152
153 /**
154  * Schedule resending of request on sending_list. This is done after
155  * we completed replaying of requests and locks.
156  */
157 int ptlrpc_resend(struct obd_import *imp)
158 {
159         struct ptlrpc_request *req, *next;
160
161         ENTRY;
162
163         /* As long as we're in recovery, nothing should be added to the sending
164          * list, so we don't need to hold the lock during this iteration and
165          * resend process.
166          */
167         /* Well... what if lctl recover is called twice at the same time?
168          */
169         cfs_spin_lock(&imp->imp_lock);
170         if (imp->imp_state != LUSTRE_IMP_RECOVER) {
171                 cfs_spin_unlock(&imp->imp_lock);
172                 RETURN(-1);
173         }
174
175         cfs_list_for_each_entry_safe(req, next, &imp->imp_sending_list,
176                                      rq_list) {
177                 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
178                          "req %p bad\n", req);
179                 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
180                 if (!ptlrpc_no_resend(req))
181                         ptlrpc_resend_req(req);
182         }
183         cfs_spin_unlock(&imp->imp_lock);
184
185         RETURN(0);
186 }
187
188 /**
189  * Go through all requests in delayed list and wake their threads
190  * for resending
191  */
192 void ptlrpc_wake_delayed(struct obd_import *imp)
193 {
194         cfs_list_t *tmp, *pos;
195         struct ptlrpc_request *req;
196
197         cfs_spin_lock(&imp->imp_lock);
198         cfs_list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
199                 req = cfs_list_entry(tmp, struct ptlrpc_request, rq_list);
200
201                 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
202                 ptlrpc_client_wake_req(req);
203         }
204         cfs_spin_unlock(&imp->imp_lock);
205 }
206
207 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
208 {
209         struct obd_import *imp = failed_req->rq_import;
210         ENTRY;
211
212         CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
213                imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
214                imp->imp_connection->c_remote_uuid.uuid);
215
216         if (ptlrpc_set_import_discon(imp,
217                               lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
218                 if (!imp->imp_replayable) {
219                         CDEBUG(D_HA, "import %s@%s for %s not replayable, "
220                                "auto-deactivating\n",
221                                obd2cli_tgt(imp->imp_obd),
222                                imp->imp_connection->c_remote_uuid.uuid,
223                                imp->imp_obd->obd_name);
224                         ptlrpc_deactivate_import(imp);
225                 }
226                 /* to control recovery via lctl {disable|enable}_recovery */
227                 if (imp->imp_deactive == 0)
228                         ptlrpc_connect_import(imp, NULL);
229         }
230
231         /* Wait for recovery to complete and resend. If evicted, then
232            this request will be errored out later.*/
233         cfs_spin_lock(&failed_req->rq_lock);
234         if (!failed_req->rq_no_resend)
235                 failed_req->rq_resend = 1;
236         cfs_spin_unlock(&failed_req->rq_lock);
237
238         EXIT;
239 }
240
241 /**
242  * Administratively active/deactive a client. 
243  * This should only be called by the ioctl interface, currently
244  *  - the lctl deactivate and activate commands
245  *  - echo 0/1 >> /proc/osc/XXX/active
246  *  - client umount -f (ll_umount_begin)
247  */
248 int ptlrpc_set_import_active(struct obd_import *imp, int active)
249 {
250         struct obd_device *obd = imp->imp_obd;
251         int rc = 0;
252
253         ENTRY;
254         LASSERT(obd);
255
256         /* When deactivating, mark import invalid, and abort in-flight
257          * requests. */
258         if (!active) {
259                 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
260                               "request\n", obd2cli_tgt(imp->imp_obd));
261
262                 /* set before invalidate to avoid messages about imp_inval
263                  * set without imp_deactive in ptlrpc_import_delay_req */
264                 cfs_spin_lock(&imp->imp_lock);
265                 imp->imp_deactive = 1;
266                 cfs_spin_unlock(&imp->imp_lock);
267
268                 ptlrpc_invalidate_import(imp);
269         }
270
271         /* When activating, mark import valid, and attempt recovery */
272         if (active) {
273                 CDEBUG(D_HA, "setting import %s VALID\n",
274                        obd2cli_tgt(imp->imp_obd));
275                 rc = ptlrpc_recover_import(imp, NULL);
276         }
277
278         RETURN(rc);
279 }
280
281 /* Attempt to reconnect an import */
282 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
283 {
284         int rc;
285         ENTRY;
286
287         cfs_spin_lock(&imp->imp_lock);
288         if (cfs_atomic_read(&imp->imp_inval_count)) {
289                 cfs_spin_unlock(&imp->imp_lock);
290                 RETURN(-EINVAL);
291         }
292         cfs_spin_unlock(&imp->imp_lock);
293
294         /* force import to be disconnected. */
295         ptlrpc_set_import_discon(imp, 0);
296
297         cfs_spin_lock(&imp->imp_lock);
298         imp->imp_deactive = 0;
299         cfs_spin_unlock(&imp->imp_lock);
300
301         rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
302
303         RETURN(rc);
304 }
305
306 int ptlrpc_import_in_recovery(struct obd_import *imp)
307 {
308         int in_recovery = 1;
309         cfs_spin_lock(&imp->imp_lock);
310         if (imp->imp_state == LUSTRE_IMP_FULL ||
311             imp->imp_state == LUSTRE_IMP_CLOSED ||
312             imp->imp_state == LUSTRE_IMP_DISCON)
313                 in_recovery = 0;
314         cfs_spin_unlock(&imp->imp_lock);
315         return in_recovery;
316 }
317
318 static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
319                                           char *new_uuid)
320 {
321         int rc;
322         int in_recovery = 0;
323         struct l_wait_info lwi;
324         ENTRY;
325
326         /* Check if reconnect is already in progress */
327         cfs_spin_lock(&imp->imp_lock);
328         if (imp->imp_state != LUSTRE_IMP_DISCON) {
329                 in_recovery = 1;
330         }
331         cfs_spin_unlock(&imp->imp_lock);
332
333         if (in_recovery == 1)
334                 RETURN(-EALREADY);
335
336         rc = ptlrpc_connect_import(imp, new_uuid);
337         if (rc)
338                 RETURN(rc);
339
340         CDEBUG(D_HA, "%s: recovery started, waiting\n",
341                obd2cli_tgt(imp->imp_obd));
342
343         lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), 
344                           NULL, NULL);
345         rc = l_wait_event(imp->imp_recovery_waitq,
346                           !ptlrpc_import_in_recovery(imp), &lwi);
347         CDEBUG(D_HA, "%s: recovery finished\n",
348                obd2cli_tgt(imp->imp_obd));
349
350         RETURN(rc);
351 }