Whamcloud - gitweb
LU-1146 build: batch update copyright messages
[fs/lustre-release.git] / lustre / ptlrpc / recover.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  *
32  * Copyright (c) 2011, Whamcloud, Inc.
33  */
34 /*
35  * This file is part of Lustre, http://www.lustre.org/
36  * Lustre is a trademark of Sun Microsystems, Inc.
37  *
38  * lustre/ptlrpc/recover.c
39  *
40  * Author: Mike Shaver <shaver@clusterfs.com>
41  */
42
43 #define DEBUG_SUBSYSTEM S_RPC
44 #ifdef __KERNEL__
45 # include <libcfs/libcfs.h>
46 #else
47 # include <liblustre.h>
48 #endif
49
50 #include <obd_support.h>
51 #include <lustre_ha.h>
52 #include <lustre_net.h>
53 #include <lustre_import.h>
54 #include <lustre_export.h>
55 #include <obd.h>
56 #include <obd_ost.h>
57 #include <obd_class.h>
58 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
59 #include <libcfs/list.h>
60
61 #include "ptlrpc_internal.h"
62
63 /**
64  * Start recovery on disconnected import.
65  * This is done by just attempting a connect
66  */
67 void ptlrpc_initiate_recovery(struct obd_import *imp)
68 {
69         ENTRY;
70
71         CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
72         ptlrpc_connect_import(imp);
73
74         EXIT;
75 }
76
77 /**
78  * Identify what request from replay list needs to be replayed next
79  * (based on what we have already replayed) and send it to server.
80  */
81 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
82 {
83         int rc = 0;
84         cfs_list_t *tmp, *pos;
85         struct ptlrpc_request *req = NULL;
86         __u64 last_transno;
87         ENTRY;
88
89         *inflight = 0;
90
91         /* It might have committed some after we last spoke, so make sure we
92          * get rid of them now.
93          */
94         cfs_spin_lock(&imp->imp_lock);
95         imp->imp_last_transno_checked = 0;
96         ptlrpc_free_committed(imp);
97         last_transno = imp->imp_last_replay_transno;
98         cfs_spin_unlock(&imp->imp_lock);
99
100         CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
101                imp, obd2cli_tgt(imp->imp_obd),
102                imp->imp_peer_committed_transno, last_transno);
103
104         /* Do I need to hold a lock across this iteration?  We shouldn't be
105          * racing with any additions to the list, because we're in recovery
106          * and are therefore not processing additional requests to add.  Calls
107          * to ptlrpc_free_committed might commit requests, but nothing "newer"
108          * than the one we're replaying (it can't be committed until it's
109          * replayed, and we're doing that here).  l_f_e_safe protects against
110          * problems with the current request being committed, in the unlikely
111          * event of that race.  So, in conclusion, I think that it's safe to
112          * perform this list-walk without the imp_lock held.
113          *
114          * But, the {mdc,osc}_replay_open callbacks both iterate
115          * request lists, and have comments saying they assume the
116          * imp_lock is being held by ptlrpc_replay, but it's not. it's
117          * just a little race...
118          */
119         cfs_list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
120                 req = cfs_list_entry(tmp, struct ptlrpc_request,
121                                      rq_replay_list);
122
123                 /* If need to resend the last sent transno (because a
124                    reconnect has occurred), then stop on the matching
125                    req and send it again. If, however, the last sent
126                    transno has been committed then we continue replay
127                    from the next request. */
128                 if (req->rq_transno > last_transno) {
129                         if (imp->imp_resend_replay)
130                                 lustre_msg_add_flags(req->rq_reqmsg,
131                                                      MSG_RESENT);
132                         break;
133                 }
134                 req = NULL;
135         }
136
137         cfs_spin_lock(&imp->imp_lock);
138         imp->imp_resend_replay = 0;
139         cfs_spin_unlock(&imp->imp_lock);
140
141         if (req != NULL) {
142                 rc = ptlrpc_replay_req(req);
143                 if (rc) {
144                         CERROR("recovery replay error %d for req "
145                                LPU64"\n", rc, req->rq_xid);
146                         RETURN(rc);
147                 }
148                 *inflight = 1;
149         }
150         RETURN(rc);
151 }
152
153 /**
154  * Schedule resending of request on sending_list. This is done after
155  * we completed replaying of requests and locks.
156  */
157 int ptlrpc_resend(struct obd_import *imp)
158 {
159         struct ptlrpc_request *req, *next;
160
161         ENTRY;
162
163         /* As long as we're in recovery, nothing should be added to the sending
164          * list, so we don't need to hold the lock during this iteration and
165          * resend process.
166          */
167         /* Well... what if lctl recover is called twice at the same time?
168          */
169         cfs_spin_lock(&imp->imp_lock);
170         if (imp->imp_state != LUSTRE_IMP_RECOVER) {
171                 cfs_spin_unlock(&imp->imp_lock);
172                 RETURN(-1);
173         }
174
175         cfs_list_for_each_entry_safe(req, next, &imp->imp_sending_list,
176                                      rq_list) {
177                 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
178                          "req %p bad\n", req);
179                 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
180                 if (!ptlrpc_no_resend(req))
181                         ptlrpc_resend_req(req);
182         }
183         cfs_spin_unlock(&imp->imp_lock);
184
185         RETURN(0);
186 }
187
188 /**
189  * Go through all requests in delayed list and wake their threads
190  * for resending
191  */
192 void ptlrpc_wake_delayed(struct obd_import *imp)
193 {
194         cfs_list_t *tmp, *pos;
195         struct ptlrpc_request *req;
196
197         cfs_spin_lock(&imp->imp_lock);
198         cfs_list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
199                 req = cfs_list_entry(tmp, struct ptlrpc_request, rq_list);
200
201                 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
202                 ptlrpc_client_wake_req(req);
203         }
204         cfs_spin_unlock(&imp->imp_lock);
205 }
206
207 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
208 {
209         struct obd_import *imp = failed_req->rq_import;
210         ENTRY;
211
212         CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
213                imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
214                imp->imp_connection->c_remote_uuid.uuid);
215
216         if (ptlrpc_set_import_discon(imp,
217                               lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
218                 if (!imp->imp_replayable) {
219                         CDEBUG(D_HA, "import %s@%s for %s not replayable, "
220                                "auto-deactivating\n",
221                                obd2cli_tgt(imp->imp_obd),
222                                imp->imp_connection->c_remote_uuid.uuid,
223                                imp->imp_obd->obd_name);
224                         ptlrpc_deactivate_import(imp);
225                 }
226                 /* to control recovery via lctl {disable|enable}_recovery */
227                 if (imp->imp_deactive == 0)
228                         ptlrpc_connect_import(imp);
229         }
230
231         /* Wait for recovery to complete and resend. If evicted, then
232            this request will be errored out later.*/
233         cfs_spin_lock(&failed_req->rq_lock);
234         if (!failed_req->rq_no_resend)
235                 failed_req->rq_resend = 1;
236         cfs_spin_unlock(&failed_req->rq_lock);
237
238         EXIT;
239 }
240
241 /**
242  * Administratively active/deactive a client. 
243  * This should only be called by the ioctl interface, currently
244  *  - the lctl deactivate and activate commands
245  *  - echo 0/1 >> /proc/osc/XXX/active
246  *  - client umount -f (ll_umount_begin)
247  */
248 int ptlrpc_set_import_active(struct obd_import *imp, int active)
249 {
250         struct obd_device *obd = imp->imp_obd;
251         int rc = 0;
252
253         ENTRY;
254         LASSERT(obd);
255
256         /* When deactivating, mark import invalid, and abort in-flight
257          * requests. */
258         if (!active) {
259                 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
260                               "request\n", obd2cli_tgt(imp->imp_obd));
261
262                 /* set before invalidate to avoid messages about imp_inval
263                  * set without imp_deactive in ptlrpc_import_delay_req */
264                 cfs_spin_lock(&imp->imp_lock);
265                 imp->imp_deactive = 1;
266                 cfs_spin_unlock(&imp->imp_lock);
267
268                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE);
269
270                 ptlrpc_invalidate_import(imp);
271         }
272
273         /* When activating, mark import valid, and attempt recovery */
274         if (active) {
275                 CDEBUG(D_HA, "setting import %s VALID\n",
276                        obd2cli_tgt(imp->imp_obd));
277
278                 cfs_spin_lock(&imp->imp_lock);
279                 imp->imp_deactive = 0;
280                 cfs_spin_unlock(&imp->imp_lock);
281                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE);
282
283                 rc = ptlrpc_recover_import(imp, NULL, 0);
284         }
285
286         RETURN(rc);
287 }
288
289 /* Attempt to reconnect an import */
290 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
291 {
292         int rc = 0;
293         ENTRY;
294
295         cfs_spin_lock(&imp->imp_lock);
296         if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive ||
297             cfs_atomic_read(&imp->imp_inval_count))
298                 rc = -EINVAL;
299         cfs_spin_unlock(&imp->imp_lock);
300         if (rc)
301                 GOTO(out, rc);
302
303         /* force import to be disconnected. */
304         ptlrpc_set_import_discon(imp, 0);
305
306         if (new_uuid) {
307                 struct obd_uuid uuid;
308
309                 /* intruct import to use new uuid */
310                 obd_str2uuid(&uuid, new_uuid);
311                 rc = import_set_conn_priority(imp, &uuid);
312                 if (rc)
313                         GOTO(out, rc);
314         }
315
316         /* Check if reconnect is already in progress */
317         cfs_spin_lock(&imp->imp_lock);
318         if (imp->imp_state != LUSTRE_IMP_DISCON) {
319                 imp->imp_force_verify = 1;
320                 rc = -EALREADY;
321         }
322         cfs_spin_unlock(&imp->imp_lock);
323         if (rc)
324                 GOTO(out, rc);
325
326         rc = ptlrpc_connect_import(imp);
327         if (rc)
328                 GOTO(out, rc);
329
330         if (!async) {
331                 struct l_wait_info lwi;
332                 int secs = cfs_time_seconds(obd_timeout);
333
334                 CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
335                        obd2cli_tgt(imp->imp_obd), secs);
336
337                 lwi = LWI_TIMEOUT(secs, NULL, NULL);
338                 rc = l_wait_event(imp->imp_recovery_waitq,
339                                   !ptlrpc_import_in_recovery(imp), &lwi);
340                 CDEBUG(D_HA, "%s: recovery finished\n",
341                        obd2cli_tgt(imp->imp_obd));
342         }
343         EXIT;
344
345 out:
346         return rc;
347 }
348
349 int ptlrpc_import_in_recovery(struct obd_import *imp)
350 {
351         int in_recovery = 1;
352         cfs_spin_lock(&imp->imp_lock);
353         if (imp->imp_state == LUSTRE_IMP_FULL ||
354             imp->imp_state == LUSTRE_IMP_CLOSED ||
355             imp->imp_state == LUSTRE_IMP_DISCON)
356                 in_recovery = 0;
357         cfs_spin_unlock(&imp->imp_lock);
358         return in_recovery;
359 }