Whamcloud - gitweb
LU-572 ptlrpc: make ptlrpc_recover_import() async
[fs/lustre-release.git] / lustre / ptlrpc / recover.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ptlrpc/recover.c
37  *
38  * Author: Mike Shaver <shaver@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_RPC
42 #ifdef __KERNEL__
43 # include <libcfs/libcfs.h>
44 #else
45 # include <liblustre.h>
46 #endif
47
48 #include <obd_support.h>
49 #include <lustre_ha.h>
50 #include <lustre_net.h>
51 #include <lustre_import.h>
52 #include <lustre_export.h>
53 #include <obd.h>
54 #include <obd_ost.h>
55 #include <obd_class.h>
56 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
57 #include <libcfs/list.h>
58
59 #include "ptlrpc_internal.h"
60
61 /**
62  * Start recovery on disconnected import.
63  * This is done by just attempting a connect
64  */
65 void ptlrpc_initiate_recovery(struct obd_import *imp)
66 {
67         ENTRY;
68
69         CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
70         ptlrpc_connect_import(imp, NULL);
71
72         EXIT;
73 }
74
75 /**
76  * Identify what request from replay list needs to be replayed next
77  * (based on what we have already replayed) and send it to server.
78  */
79 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
80 {
81         int rc = 0;
82         cfs_list_t *tmp, *pos;
83         struct ptlrpc_request *req = NULL;
84         __u64 last_transno;
85         ENTRY;
86
87         *inflight = 0;
88
89         /* It might have committed some after we last spoke, so make sure we
90          * get rid of them now.
91          */
92         cfs_spin_lock(&imp->imp_lock);
93         imp->imp_last_transno_checked = 0;
94         ptlrpc_free_committed(imp);
95         last_transno = imp->imp_last_replay_transno;
96         cfs_spin_unlock(&imp->imp_lock);
97
98         CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
99                imp, obd2cli_tgt(imp->imp_obd),
100                imp->imp_peer_committed_transno, last_transno);
101
102         /* Do I need to hold a lock across this iteration?  We shouldn't be
103          * racing with any additions to the list, because we're in recovery
104          * and are therefore not processing additional requests to add.  Calls
105          * to ptlrpc_free_committed might commit requests, but nothing "newer"
106          * than the one we're replaying (it can't be committed until it's
107          * replayed, and we're doing that here).  l_f_e_safe protects against
108          * problems with the current request being committed, in the unlikely
109          * event of that race.  So, in conclusion, I think that it's safe to
110          * perform this list-walk without the imp_lock held.
111          *
112          * But, the {mdc,osc}_replay_open callbacks both iterate
113          * request lists, and have comments saying they assume the
114          * imp_lock is being held by ptlrpc_replay, but it's not. it's
115          * just a little race...
116          */
117         cfs_list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
118                 req = cfs_list_entry(tmp, struct ptlrpc_request,
119                                      rq_replay_list);
120
121                 /* If need to resend the last sent transno (because a
122                    reconnect has occurred), then stop on the matching
123                    req and send it again. If, however, the last sent
124                    transno has been committed then we continue replay
125                    from the next request. */
126                 if (req->rq_transno > last_transno) {
127                         if (imp->imp_resend_replay)
128                                 lustre_msg_add_flags(req->rq_reqmsg,
129                                                      MSG_RESENT);
130                         break;
131                 }
132                 req = NULL;
133         }
134
135         cfs_spin_lock(&imp->imp_lock);
136         imp->imp_resend_replay = 0;
137         cfs_spin_unlock(&imp->imp_lock);
138
139         if (req != NULL) {
140                 rc = ptlrpc_replay_req(req);
141                 if (rc) {
142                         CERROR("recovery replay error %d for req "
143                                LPU64"\n", rc, req->rq_xid);
144                         RETURN(rc);
145                 }
146                 *inflight = 1;
147         }
148         RETURN(rc);
149 }
150
151 /**
152  * Schedule resending of request on sending_list. This is done after
153  * we completed replaying of requests and locks.
154  */
155 int ptlrpc_resend(struct obd_import *imp)
156 {
157         struct ptlrpc_request *req, *next;
158
159         ENTRY;
160
161         /* As long as we're in recovery, nothing should be added to the sending
162          * list, so we don't need to hold the lock during this iteration and
163          * resend process.
164          */
165         /* Well... what if lctl recover is called twice at the same time?
166          */
167         cfs_spin_lock(&imp->imp_lock);
168         if (imp->imp_state != LUSTRE_IMP_RECOVER) {
169                 cfs_spin_unlock(&imp->imp_lock);
170                 RETURN(-1);
171         }
172
173         cfs_list_for_each_entry_safe(req, next, &imp->imp_sending_list,
174                                      rq_list) {
175                 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
176                          "req %p bad\n", req);
177                 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
178                 if (!ptlrpc_no_resend(req))
179                         ptlrpc_resend_req(req);
180         }
181         cfs_spin_unlock(&imp->imp_lock);
182
183         RETURN(0);
184 }
185
186 /**
187  * Go through all requests in delayed list and wake their threads
188  * for resending
189  */
190 void ptlrpc_wake_delayed(struct obd_import *imp)
191 {
192         cfs_list_t *tmp, *pos;
193         struct ptlrpc_request *req;
194
195         cfs_spin_lock(&imp->imp_lock);
196         cfs_list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
197                 req = cfs_list_entry(tmp, struct ptlrpc_request, rq_list);
198
199                 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
200                 ptlrpc_client_wake_req(req);
201         }
202         cfs_spin_unlock(&imp->imp_lock);
203 }
204
205 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
206 {
207         struct obd_import *imp = failed_req->rq_import;
208         ENTRY;
209
210         CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
211                imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
212                imp->imp_connection->c_remote_uuid.uuid);
213
214         if (ptlrpc_set_import_discon(imp,
215                               lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
216                 if (!imp->imp_replayable) {
217                         CDEBUG(D_HA, "import %s@%s for %s not replayable, "
218                                "auto-deactivating\n",
219                                obd2cli_tgt(imp->imp_obd),
220                                imp->imp_connection->c_remote_uuid.uuid,
221                                imp->imp_obd->obd_name);
222                         ptlrpc_deactivate_import(imp);
223                 }
224                 /* to control recovery via lctl {disable|enable}_recovery */
225                 if (imp->imp_deactive == 0)
226                         ptlrpc_connect_import(imp, NULL);
227         }
228
229         /* Wait for recovery to complete and resend. If evicted, then
230            this request will be errored out later.*/
231         cfs_spin_lock(&failed_req->rq_lock);
232         if (!failed_req->rq_no_resend)
233                 failed_req->rq_resend = 1;
234         cfs_spin_unlock(&failed_req->rq_lock);
235
236         EXIT;
237 }
238
239 /**
240  * Administratively active/deactive a client. 
241  * This should only be called by the ioctl interface, currently
242  *  - the lctl deactivate and activate commands
243  *  - echo 0/1 >> /proc/osc/XXX/active
244  *  - client umount -f (ll_umount_begin)
245  */
246 int ptlrpc_set_import_active(struct obd_import *imp, int active)
247 {
248         struct obd_device *obd = imp->imp_obd;
249         int rc = 0;
250
251         ENTRY;
252         LASSERT(obd);
253
254         /* When deactivating, mark import invalid, and abort in-flight
255          * requests. */
256         if (!active) {
257                 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
258                               "request\n", obd2cli_tgt(imp->imp_obd));
259
260                 /* set before invalidate to avoid messages about imp_inval
261                  * set without imp_deactive in ptlrpc_import_delay_req */
262                 cfs_spin_lock(&imp->imp_lock);
263                 imp->imp_deactive = 1;
264                 cfs_spin_unlock(&imp->imp_lock);
265
266                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE);
267
268                 ptlrpc_invalidate_import(imp);
269         }
270
271         /* When activating, mark import valid, and attempt recovery */
272         if (active) {
273                 CDEBUG(D_HA, "setting import %s VALID\n",
274                        obd2cli_tgt(imp->imp_obd));
275
276                 cfs_spin_lock(&imp->imp_lock);
277                 imp->imp_deactive = 0;
278                 cfs_spin_unlock(&imp->imp_lock);
279                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE);
280
281                 rc = ptlrpc_recover_import(imp, NULL, 0);
282         }
283
284         RETURN(rc);
285 }
286
287 /* Attempt to reconnect an import */
288 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
289 {
290         int rc = 0;
291         ENTRY;
292
293         cfs_spin_lock(&imp->imp_lock);
294         if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive ||
295             cfs_atomic_read(&imp->imp_inval_count))
296                 rc = -EINVAL;
297         cfs_spin_unlock(&imp->imp_lock);
298         if (rc)
299                 GOTO(out, rc);
300
301         /* force import to be disconnected. */
302         ptlrpc_set_import_discon(imp, 0);
303
304         /* Check if reconnect is already in progress */
305         cfs_spin_lock(&imp->imp_lock);
306         if (imp->imp_state != LUSTRE_IMP_DISCON)
307                 rc = -EALREADY;
308         cfs_spin_unlock(&imp->imp_lock);
309         if (rc)
310                 GOTO(out, rc);
311
312         rc = ptlrpc_connect_import(imp, new_uuid);
313         if (rc)
314                 GOTO(out, rc);
315
316         if (!async) {
317                 struct l_wait_info lwi;
318                 int secs = cfs_time_seconds(obd_timeout);
319
320                 CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
321                        obd2cli_tgt(imp->imp_obd), secs);
322
323                 lwi = LWI_TIMEOUT(secs, NULL, NULL);
324                 rc = l_wait_event(imp->imp_recovery_waitq,
325                                   !ptlrpc_import_in_recovery(imp), &lwi);
326                 CDEBUG(D_HA, "%s: recovery finished\n",
327                        obd2cli_tgt(imp->imp_obd));
328         }
329         EXIT;
330
331 out:
332         return rc;
333 }
334
335 int ptlrpc_import_in_recovery(struct obd_import *imp)
336 {
337         int in_recovery = 1;
338         cfs_spin_lock(&imp->imp_lock);
339         if (imp->imp_state == LUSTRE_IMP_FULL ||
340             imp->imp_state == LUSTRE_IMP_CLOSED ||
341             imp->imp_state == LUSTRE_IMP_DISCON)
342                 in_recovery = 0;
343         cfs_spin_unlock(&imp->imp_lock);
344         return in_recovery;
345 }