Whamcloud - gitweb
LU-1346 libcfs: replace libcfs wrappers with kernel API
[fs/lustre-release.git] / lustre / ptlrpc / recover.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, Whamcloud, Inc.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ptlrpc/recover.c
37  *
38  * Author: Mike Shaver <shaver@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_RPC
42 #ifdef __KERNEL__
43 # include <libcfs/libcfs.h>
44 #else
45 # include <liblustre.h>
46 #endif
47
48 #include <obd_support.h>
49 #include <lustre_ha.h>
50 #include <lustre_net.h>
51 #include <lustre_import.h>
52 #include <lustre_export.h>
53 #include <obd.h>
54 #include <obd_ost.h>
55 #include <obd_class.h>
56 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
57 #include <libcfs/list.h>
58
59 #include "ptlrpc_internal.h"
60
61 /**
62  * Start recovery on disconnected import.
63  * This is done by just attempting a connect
64  */
65 void ptlrpc_initiate_recovery(struct obd_import *imp)
66 {
67         ENTRY;
68
69         CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
70         ptlrpc_connect_import(imp);
71
72         EXIT;
73 }
74
75 /**
76  * Identify what request from replay list needs to be replayed next
77  * (based on what we have already replayed) and send it to server.
78  */
79 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
80 {
81         int rc = 0;
82         cfs_list_t *tmp, *pos;
83         struct ptlrpc_request *req = NULL;
84         __u64 last_transno;
85         ENTRY;
86
87         *inflight = 0;
88
89         /* It might have committed some after we last spoke, so make sure we
90          * get rid of them now.
91          */
92         spin_lock(&imp->imp_lock);
93         imp->imp_last_transno_checked = 0;
94         ptlrpc_free_committed(imp);
95         last_transno = imp->imp_last_replay_transno;
96         spin_unlock(&imp->imp_lock);
97
98         CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
99                imp, obd2cli_tgt(imp->imp_obd),
100                imp->imp_peer_committed_transno, last_transno);
101
102         /* Do I need to hold a lock across this iteration?  We shouldn't be
103          * racing with any additions to the list, because we're in recovery
104          * and are therefore not processing additional requests to add.  Calls
105          * to ptlrpc_free_committed might commit requests, but nothing "newer"
106          * than the one we're replaying (it can't be committed until it's
107          * replayed, and we're doing that here).  l_f_e_safe protects against
108          * problems with the current request being committed, in the unlikely
109          * event of that race.  So, in conclusion, I think that it's safe to
110          * perform this list-walk without the imp_lock held.
111          *
112          * But, the {mdc,osc}_replay_open callbacks both iterate
113          * request lists, and have comments saying they assume the
114          * imp_lock is being held by ptlrpc_replay, but it's not. it's
115          * just a little race...
116          */
117         cfs_list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
118                 req = cfs_list_entry(tmp, struct ptlrpc_request,
119                                      rq_replay_list);
120
121                 /* If need to resend the last sent transno (because a
122                    reconnect has occurred), then stop on the matching
123                    req and send it again. If, however, the last sent
124                    transno has been committed then we continue replay
125                    from the next request. */
126                 if (req->rq_transno > last_transno) {
127                         if (imp->imp_resend_replay)
128                                 lustre_msg_add_flags(req->rq_reqmsg,
129                                                      MSG_RESENT);
130                         break;
131                 }
132                 req = NULL;
133         }
134
135         spin_lock(&imp->imp_lock);
136         imp->imp_resend_replay = 0;
137         spin_unlock(&imp->imp_lock);
138
139         if (req != NULL) {
140                 rc = ptlrpc_replay_req(req);
141                 if (rc) {
142                         CERROR("recovery replay error %d for req "
143                                LPU64"\n", rc, req->rq_xid);
144                         RETURN(rc);
145                 }
146                 *inflight = 1;
147         }
148         RETURN(rc);
149 }
150
151 /**
152  * Schedule resending of request on sending_list. This is done after
153  * we completed replaying of requests and locks.
154  */
155 int ptlrpc_resend(struct obd_import *imp)
156 {
157         struct ptlrpc_request *req, *next;
158
159         ENTRY;
160
161         /* As long as we're in recovery, nothing should be added to the sending
162          * list, so we don't need to hold the lock during this iteration and
163          * resend process.
164          */
165         /* Well... what if lctl recover is called twice at the same time?
166          */
167         spin_lock(&imp->imp_lock);
168         if (imp->imp_state != LUSTRE_IMP_RECOVER) {
169                 spin_unlock(&imp->imp_lock);
170                 RETURN(-1);
171         }
172
173         cfs_list_for_each_entry_safe(req, next, &imp->imp_sending_list,
174                                      rq_list) {
175                 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
176                          "req %p bad\n", req);
177                 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
178                 if (!ptlrpc_no_resend(req))
179                         ptlrpc_resend_req(req);
180         }
181         spin_unlock(&imp->imp_lock);
182
183         RETURN(0);
184 }
185 EXPORT_SYMBOL(ptlrpc_resend);
186
187 /**
188  * Go through all requests in delayed list and wake their threads
189  * for resending
190  */
191 void ptlrpc_wake_delayed(struct obd_import *imp)
192 {
193         cfs_list_t *tmp, *pos;
194         struct ptlrpc_request *req;
195
196         spin_lock(&imp->imp_lock);
197         cfs_list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
198                 req = cfs_list_entry(tmp, struct ptlrpc_request, rq_list);
199
200                 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
201                 ptlrpc_client_wake_req(req);
202         }
203         spin_unlock(&imp->imp_lock);
204 }
205 EXPORT_SYMBOL(ptlrpc_wake_delayed);
206
207 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
208 {
209         struct obd_import *imp = failed_req->rq_import;
210         ENTRY;
211
212         CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
213                imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
214                imp->imp_connection->c_remote_uuid.uuid);
215
216         if (ptlrpc_set_import_discon(imp,
217                               lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
218                 if (!imp->imp_replayable) {
219                         CDEBUG(D_HA, "import %s@%s for %s not replayable, "
220                                "auto-deactivating\n",
221                                obd2cli_tgt(imp->imp_obd),
222                                imp->imp_connection->c_remote_uuid.uuid,
223                                imp->imp_obd->obd_name);
224                         ptlrpc_deactivate_import(imp);
225                 }
226                 /* to control recovery via lctl {disable|enable}_recovery */
227                 if (imp->imp_deactive == 0)
228                         ptlrpc_connect_import(imp);
229         }
230
231         /* Wait for recovery to complete and resend. If evicted, then
232            this request will be errored out later.*/
233         spin_lock(&failed_req->rq_lock);
234         if (!failed_req->rq_no_resend)
235                 failed_req->rq_resend = 1;
236         spin_unlock(&failed_req->rq_lock);
237
238         EXIT;
239 }
240
241 /**
242  * Administratively active/deactive a client. 
243  * This should only be called by the ioctl interface, currently
244  *  - the lctl deactivate and activate commands
245  *  - echo 0/1 >> /proc/osc/XXX/active
246  *  - client umount -f (ll_umount_begin)
247  */
248 int ptlrpc_set_import_active(struct obd_import *imp, int active)
249 {
250         struct obd_device *obd = imp->imp_obd;
251         int rc = 0;
252
253         ENTRY;
254         LASSERT(obd);
255
256         /* When deactivating, mark import invalid, and abort in-flight
257          * requests. */
258         if (!active) {
259                 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
260                               "request\n", obd2cli_tgt(imp->imp_obd));
261
262                 /* set before invalidate to avoid messages about imp_inval
263                  * set without imp_deactive in ptlrpc_import_delay_req */
264                 spin_lock(&imp->imp_lock);
265                 imp->imp_deactive = 1;
266                 spin_unlock(&imp->imp_lock);
267
268                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE);
269
270                 ptlrpc_invalidate_import(imp);
271         }
272
273         /* When activating, mark import valid, and attempt recovery */
274         if (active) {
275                 CDEBUG(D_HA, "setting import %s VALID\n",
276                        obd2cli_tgt(imp->imp_obd));
277
278                 spin_lock(&imp->imp_lock);
279                 imp->imp_deactive = 0;
280                 spin_unlock(&imp->imp_lock);
281                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE);
282
283                 rc = ptlrpc_recover_import(imp, NULL, 0);
284         }
285
286         RETURN(rc);
287 }
288 EXPORT_SYMBOL(ptlrpc_set_import_active);
289
290 /* Attempt to reconnect an import */
291 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
292 {
293         int rc = 0;
294         ENTRY;
295
296         spin_lock(&imp->imp_lock);
297         if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive ||
298             cfs_atomic_read(&imp->imp_inval_count))
299                 rc = -EINVAL;
300         spin_unlock(&imp->imp_lock);
301         if (rc)
302                 GOTO(out, rc);
303
304         /* force import to be disconnected. */
305         ptlrpc_set_import_discon(imp, 0);
306
307         if (new_uuid) {
308                 struct obd_uuid uuid;
309
310                 /* intruct import to use new uuid */
311                 obd_str2uuid(&uuid, new_uuid);
312                 rc = import_set_conn_priority(imp, &uuid);
313                 if (rc)
314                         GOTO(out, rc);
315         }
316
317         /* Check if reconnect is already in progress */
318         spin_lock(&imp->imp_lock);
319         if (imp->imp_state != LUSTRE_IMP_DISCON) {
320                 imp->imp_force_verify = 1;
321                 rc = -EALREADY;
322         }
323         spin_unlock(&imp->imp_lock);
324         if (rc)
325                 GOTO(out, rc);
326
327         rc = ptlrpc_connect_import(imp);
328         if (rc)
329                 GOTO(out, rc);
330
331         if (!async) {
332                 struct l_wait_info lwi;
333                 int secs = cfs_time_seconds(obd_timeout);
334
335                 CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
336                        obd2cli_tgt(imp->imp_obd), secs);
337
338                 lwi = LWI_TIMEOUT(secs, NULL, NULL);
339                 rc = l_wait_event(imp->imp_recovery_waitq,
340                                   !ptlrpc_import_in_recovery(imp), &lwi);
341                 CDEBUG(D_HA, "%s: recovery finished\n",
342                        obd2cli_tgt(imp->imp_obd));
343         }
344         EXIT;
345
346 out:
347         return rc;
348 }
349 EXPORT_SYMBOL(ptlrpc_recover_import);
350
351 int ptlrpc_import_in_recovery(struct obd_import *imp)
352 {
353         int in_recovery = 1;
354         spin_lock(&imp->imp_lock);
355         if (imp->imp_state == LUSTRE_IMP_FULL ||
356             imp->imp_state == LUSTRE_IMP_CLOSED ||
357             imp->imp_state == LUSTRE_IMP_DISCON)
358                 in_recovery = 0;
359         spin_unlock(&imp->imp_lock);
360         return in_recovery;
361 }