Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / ptlrpc / recover.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Portal-RPC reconnection and replay operations, for use in recovery.
5  *
6  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
7  *   Author: Mike Shaver <shaver@clusterfs.com>
8  *
9  *   This file is part of the Lustre file system, http://www.lustre.org
10  *   Lustre is a trademark of Cluster File Systems, Inc.
11  *
12  *   You may have signed or agreed to another license before downloading
13  *   this software.  If so, you are bound by the terms and conditions
14  *   of that agreement, and the following does not apply to you.  See the
15  *   LICENSE file included with this distribution for more information.
16  *
17  *   If you did not agree to a different license, then this copy of Lustre
18  *   is open source software; you can redistribute it and/or modify it
19  *   under the terms of version 2 of the GNU General Public License as
20  *   published by the Free Software Foundation.
21  *
22  *   In either case, Lustre is distributed in the hope that it will be
23  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
24  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25  *   license text for more details.
26  */
27
28 #define DEBUG_SUBSYSTEM S_RPC
29 #ifdef __KERNEL__
30 # include <libcfs/libcfs.h>
31 #else
32 # include <liblustre.h>
33 #endif
34
35 #include <obd_support.h>
36 #include <lustre_ha.h>
37 #include <lustre_net.h>
38 #include <lustre_import.h>
39 #include <lustre_export.h>
40 #include <obd.h>
41 #include <obd_ost.h>
42 #include <obd_class.h>
43 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
44 #include <libcfs/list.h>
45
46 #include "ptlrpc_internal.h"
47
48 static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
49
50 void ptlrpc_initiate_recovery(struct obd_import *imp)
51 {
52         ENTRY;
53
54         CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
55         ptlrpc_connect_import(imp, NULL);
56
57         EXIT;
58 }
59
60 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
61 {
62         int rc = 0;
63         struct list_head *tmp, *pos;
64         struct ptlrpc_request *req = NULL;
65         __u64 last_transno;
66         ENTRY;
67
68         *inflight = 0;
69
70         /* It might have committed some after we last spoke, so make sure we
71          * get rid of them now.
72          */
73         spin_lock(&imp->imp_lock);
74         imp->imp_last_transno_checked = 0;
75         ptlrpc_free_committed(imp);
76         last_transno = imp->imp_last_replay_transno;
77         spin_unlock(&imp->imp_lock);
78
79         CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
80                imp, obd2cli_tgt(imp->imp_obd),
81                imp->imp_peer_committed_transno, last_transno);
82
83         /* Do I need to hold a lock across this iteration?  We shouldn't be
84          * racing with any additions to the list, because we're in recovery
85          * and are therefore not processing additional requests to add.  Calls
86          * to ptlrpc_free_committed might commit requests, but nothing "newer"
87          * than the one we're replaying (it can't be committed until it's
88          * replayed, and we're doing that here).  l_f_e_safe protects against
89          * problems with the current request being committed, in the unlikely
90          * event of that race.  So, in conclusion, I think that it's safe to
91          * perform this list-walk without the imp_lock held.
92          *
93          * But, the {mdc,osc}_replay_open callbacks both iterate
94          * request lists, and have comments saying they assume the
95          * imp_lock is being held by ptlrpc_replay, but it's not. it's
96          * just a little race...
97          */
98         list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
99                 req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
100
101                 /* If need to resend the last sent transno (because a
102                    reconnect has occurred), then stop on the matching
103                    req and send it again. If, however, the last sent
104                    transno has been committed then we continue replay
105                    from the next request. */
106                 if (imp->imp_resend_replay && 
107                     req->rq_transno == last_transno) {
108                         lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
109                         break;
110                 }
111
112                 if (req->rq_transno > last_transno) {
113                         imp->imp_last_replay_transno = req->rq_transno;
114                         break;
115                 }
116
117                 req = NULL;
118         }
119
120         spin_lock(&imp->imp_lock);
121         imp->imp_resend_replay = 0;
122         spin_unlock(&imp->imp_lock);
123
124         if (req != NULL) {
125                 rc = ptlrpc_replay_req(req);
126                 if (rc) {
127                         CERROR("recovery replay error %d for req "
128                                LPD64"\n", rc, req->rq_xid);
129                         RETURN(rc);
130                 }
131                 *inflight = 1;
132         }
133         RETURN(rc);
134 }
135
136 int ptlrpc_resend(struct obd_import *imp)
137 {
138         struct ptlrpc_request *req, *next;
139
140         ENTRY;
141
142         /* As long as we're in recovery, nothing should be added to the sending
143          * list, so we don't need to hold the lock during this iteration and
144          * resend process.
145          */
146         /* Well... what if lctl recover is called twice at the same time?
147          */
148         spin_lock(&imp->imp_lock);
149         if (imp->imp_state != LUSTRE_IMP_RECOVER) {
150                 spin_unlock(&imp->imp_lock);
151                 RETURN(-1);
152         }
153         spin_unlock(&imp->imp_lock);
154
155         list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) {
156                 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
157                          "req %p bad\n", req);
158                 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
159                 if (!req->rq_no_resend)
160                         ptlrpc_resend_req(req);
161         }
162
163         RETURN(0);
164 }
165
166 void ptlrpc_wake_delayed(struct obd_import *imp)
167 {
168         struct list_head *tmp, *pos;
169         struct ptlrpc_request *req;
170
171         spin_lock(&imp->imp_lock);
172         list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
173                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
174
175                 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
176                 ptlrpc_wake_client_req(req);
177         }
178         spin_unlock(&imp->imp_lock);
179 }
180
181 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
182 {
183         struct obd_import *imp = failed_req->rq_import;
184         ENTRY;
185
186         CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
187                imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
188                imp->imp_connection->c_remote_uuid.uuid);
189
190         if (ptlrpc_set_import_discon(imp,
191                               lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
192                 if (!imp->imp_replayable) {
193                         CDEBUG(D_HA, "import %s@%s for %s not replayable, "
194                                "auto-deactivating\n",
195                                obd2cli_tgt(imp->imp_obd),
196                                imp->imp_connection->c_remote_uuid.uuid,
197                                imp->imp_obd->obd_name);
198                         ptlrpc_deactivate_import(imp);
199                 }
200                 /* to control recovery via lctl {disable|enable}_recovery */
201                 if (imp->imp_deactive == 0)
202                         ptlrpc_connect_import(imp, NULL);
203         }
204
205         /* Wait for recovery to complete and resend. If evicted, then
206            this request will be errored out later.*/
207         spin_lock(&failed_req->rq_lock);
208         if (!failed_req->rq_no_resend)
209                 failed_req->rq_resend = 1;
210         spin_unlock(&failed_req->rq_lock);
211
212         EXIT;
213 }
214
215 /*
216  * Administratively active/deactive a client. 
217  * This should only be called by the ioctl interface, currently
218  *  - the lctl deactivate and activate commands
219  *  - echo 0/1 >> /proc/osc/XXX/active
220  *  - client umount -f (ll_umount_begin)
221  */
222 int ptlrpc_set_import_active(struct obd_import *imp, int active)
223 {
224         struct obd_device *obd = imp->imp_obd;
225         int rc = 0;
226
227         ENTRY;
228         LASSERT(obd);
229
230         /* When deactivating, mark import invalid, and abort in-flight
231          * requests. */
232         if (!active) {
233                 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
234                               "request\n", obd2cli_tgt(imp->imp_obd));
235                 ptlrpc_invalidate_import(imp);
236
237                 spin_lock(&imp->imp_lock);
238                 imp->imp_deactive = 1;
239                 spin_unlock(&imp->imp_lock);
240         }
241
242         /* When activating, mark import valid, and attempt recovery */
243         if (active) {
244                 spin_lock(&imp->imp_lock);
245                 imp->imp_deactive = 0;
246                 spin_unlock(&imp->imp_lock);
247
248                 CDEBUG(D_HA, "setting import %s VALID\n",
249                        obd2cli_tgt(imp->imp_obd));
250                 rc = ptlrpc_recover_import(imp, NULL);
251         }
252
253         RETURN(rc);
254 }
255
256 /* Attempt to reconnect an import */
257 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
258 {
259         int rc;
260         ENTRY;
261
262         /* force import to be disconnected. */
263         ptlrpc_set_import_discon(imp, 0);
264
265         spin_lock(&imp->imp_lock);
266         imp->imp_deactive = 0;
267         spin_unlock(&imp->imp_lock);
268
269         rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
270
271         RETURN(rc);
272 }
273
274 int ptlrpc_import_in_recovery(struct obd_import *imp)
275 {
276         int in_recovery = 1;
277         spin_lock(&imp->imp_lock);
278         if (imp->imp_state == LUSTRE_IMP_FULL ||
279             imp->imp_state == LUSTRE_IMP_CLOSED ||
280             imp->imp_state == LUSTRE_IMP_DISCON)
281                 in_recovery = 0;
282         spin_unlock(&imp->imp_lock);
283         return in_recovery;
284 }
285
286 static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
287                                           char *new_uuid)
288 {
289         int rc;
290         int in_recovery = 0;
291         struct l_wait_info lwi;
292         ENTRY;
293
294         /* Check if reconnect is already in progress */
295         spin_lock(&imp->imp_lock);
296         if (imp->imp_state != LUSTRE_IMP_DISCON) {
297                 in_recovery = 1;
298         }
299         spin_unlock(&imp->imp_lock);
300
301         if (in_recovery == 1)
302                 RETURN(-EALREADY);
303
304         rc = ptlrpc_connect_import(imp, new_uuid);
305         if (rc)
306                 RETURN(rc);
307
308         CDEBUG(D_HA, "%s: recovery started, waiting\n",
309                obd2cli_tgt(imp->imp_obd));
310
311         lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), 
312                           NULL, NULL);
313         rc = l_wait_event(imp->imp_recovery_waitq,
314                           !ptlrpc_import_in_recovery(imp), &lwi);
315         CDEBUG(D_HA, "%s: recovery finished\n",
316                obd2cli_tgt(imp->imp_obd));
317
318         RETURN(rc);
319 }