Whamcloud - gitweb
Some racy problems happened when sanity-quota.sh run on buffalo.
[fs/lustre-release.git] / lustre / ptlrpc / recover.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Portal-RPC reconnection and replay operations, for use in recovery.
5  *
6  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
7  *   Author: Mike Shaver <shaver@clusterfs.com>
8  *
9  *   This file is part of the Lustre file system, http://www.lustre.org
10  *   Lustre is a trademark of Cluster File Systems, Inc.
11  *
12  *   You may have signed or agreed to another license before downloading
13  *   this software.  If so, you are bound by the terms and conditions
14  *   of that agreement, and the following does not apply to you.  See the
15  *   LICENSE file included with this distribution for more information.
16  *
17  *   If you did not agree to a different license, then this copy of Lustre
18  *   is open source software; you can redistribute it and/or modify it
19  *   under the terms of version 2 of the GNU General Public License as
20  *   published by the Free Software Foundation.
21  *
22  *   In either case, Lustre is distributed in the hope that it will be
23  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
24  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25  *   license text for more details.
26  */
27
28 #define DEBUG_SUBSYSTEM S_RPC
29 #ifdef __KERNEL__
30 # include <libcfs/libcfs.h>
31 #else
32 # include <liblustre.h>
33 #endif
34
35 #include <obd_support.h>
36 #include <lustre_ha.h>
37 #include <lustre_net.h>
38 #include <lustre_import.h>
39 #include <lustre_export.h>
40 #include <obd.h>
41 #include <obd_ost.h>
42 #include <obd_class.h>
43 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
44 #include <libcfs/list.h>
45
46 #include "ptlrpc_internal.h"
47
48 static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
49
50 void ptlrpc_initiate_recovery(struct obd_import *imp)
51 {
52         ENTRY;
53
54         CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
55         ptlrpc_connect_import(imp, NULL);
56
57         EXIT;
58 }
59
60 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
61 {
62         int rc = 0;
63         struct list_head *tmp, *pos;
64         struct ptlrpc_request *req = NULL;
65         __u64 last_transno;
66         ENTRY;
67
68         *inflight = 0;
69
70         /* It might have committed some after we last spoke, so make sure we
71          * get rid of them now.
72          */
73         spin_lock(&imp->imp_lock);
74         imp->imp_last_transno_checked = 0;
75         ptlrpc_free_committed(imp);
76         last_transno = imp->imp_last_replay_transno;
77         spin_unlock(&imp->imp_lock);
78
79         CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
80                imp, obd2cli_tgt(imp->imp_obd),
81                imp->imp_peer_committed_transno, last_transno);
82
83         /* Do I need to hold a lock across this iteration?  We shouldn't be
84          * racing with any additions to the list, because we're in recovery
85          * and are therefore not processing additional requests to add.  Calls
86          * to ptlrpc_free_committed might commit requests, but nothing "newer"
87          * than the one we're replaying (it can't be committed until it's
88          * replayed, and we're doing that here).  l_f_e_safe protects against
89          * problems with the current request being committed, in the unlikely
90          * event of that race.  So, in conclusion, I think that it's safe to
91          * perform this list-walk without the imp_lock held.
92          *
93          * But, the {mdc,osc}_replay_open callbacks both iterate
94          * request lists, and have comments saying they assume the
95          * imp_lock is being held by ptlrpc_replay, but it's not. it's
96          * just a little race...
97          */
98         list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
99                 req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
100
101                 /* If need to resend the last sent transno (because a
102                    reconnect has occurred), then stop on the matching
103                    req and send it again. If, however, the last sent
104                    transno has been committed then we continue replay
105                    from the next request. */
106                 if (imp->imp_resend_replay && 
107                     req->rq_transno == last_transno) {
108                         lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
109                         break;
110                 }
111
112                 if (req->rq_transno > last_transno) {
113                         imp->imp_last_replay_transno = req->rq_transno;
114                         break;
115                 }
116
117                 req = NULL;
118         }
119
120         imp->imp_resend_replay = 0;
121
122         if (req != NULL) {
123                 rc = ptlrpc_replay_req(req);
124                 if (rc) {
125                         CERROR("recovery replay error %d for req "
126                                LPD64"\n", rc, req->rq_xid);
127                         RETURN(rc);
128                 }
129                 *inflight = 1;
130         }
131         RETURN(rc);
132 }
133
134 int ptlrpc_resend(struct obd_import *imp)
135 {
136         struct ptlrpc_request *req, *next;
137
138         ENTRY;
139
140         /* As long as we're in recovery, nothing should be added to the sending
141          * list, so we don't need to hold the lock during this iteration and
142          * resend process.
143          */
144         /* Well... what if lctl recover is called twice at the same time?
145          */
146         spin_lock(&imp->imp_lock);
147         if (imp->imp_state != LUSTRE_IMP_RECOVER) {
148                 spin_unlock(&imp->imp_lock);
149                 RETURN(-1);
150         }
151         spin_unlock(&imp->imp_lock);
152
153         list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) {
154                 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
155                          "req %p bad\n", req);
156                 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
157                 if (!req->rq_no_resend)
158                         ptlrpc_resend_req(req);
159         }
160
161         RETURN(0);
162 }
163
164 void ptlrpc_wake_delayed(struct obd_import *imp)
165 {
166         struct list_head *tmp, *pos;
167         struct ptlrpc_request *req;
168
169         spin_lock(&imp->imp_lock);
170         list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
171                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
172
173                 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
174                 ptlrpc_wake_client_req(req);
175         }
176         spin_unlock(&imp->imp_lock);
177 }
178
179 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
180 {
181         struct obd_import *imp = failed_req->rq_import;
182         ENTRY;
183
184         CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
185                imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
186                imp->imp_connection->c_remote_uuid.uuid);
187
188         if (ptlrpc_set_import_discon(imp,
189                               lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
190                 if (!imp->imp_replayable) {
191                         CDEBUG(D_HA, "import %s@%s for %s not replayable, "
192                                "auto-deactivating\n",
193                                obd2cli_tgt(imp->imp_obd),
194                                imp->imp_connection->c_remote_uuid.uuid,
195                                imp->imp_obd->obd_name);
196                         ptlrpc_deactivate_import(imp);
197                 }
198                 /* to control recovery via lctl {disable|enable}_recovery */
199                 if (imp->imp_deactive == 0)
200                         ptlrpc_connect_import(imp, NULL);
201         }
202
203         /* Wait for recovery to complete and resend. If evicted, then
204            this request will be errored out later.*/
205         spin_lock(&failed_req->rq_lock);
206         if (!failed_req->rq_no_resend)
207                 failed_req->rq_resend = 1;
208         spin_unlock(&failed_req->rq_lock);
209
210         EXIT;
211 }
212
213 /*
214  * Administratively active/deactive a client. 
215  * This should only be called by the ioctl interface, currently
216  *  - the lctl deactivate and activate commands
217  *  - echo 0/1 >> /proc/osc/XXX/active
218  *  - client umount -f (ll_umount_begin)
219  */
220 int ptlrpc_set_import_active(struct obd_import *imp, int active)
221 {
222         struct obd_device *obd = imp->imp_obd;
223         int rc = 0;
224
225         ENTRY;
226         LASSERT(obd);
227
228         /* When deactivating, mark import invalid, and abort in-flight
229          * requests. */
230         if (!active) {
231                 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
232                               "request\n", obd2cli_tgt(imp->imp_obd));
233                 ptlrpc_invalidate_import(imp);
234                 imp->imp_deactive = 1;
235         }
236
237         /* When activating, mark import valid, and attempt recovery */
238         if (active) {
239                 imp->imp_deactive = 0;
240                 CDEBUG(D_HA, "setting import %s VALID\n",
241                        obd2cli_tgt(imp->imp_obd));
242                 rc = ptlrpc_recover_import(imp, NULL);
243         }
244
245         RETURN(rc);
246 }
247
248 /* Attempt to reconnect an import */
249 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
250 {
251         int rc;
252         ENTRY;
253
254         /* force import to be disconnected. */
255         ptlrpc_set_import_discon(imp, 0);
256
257         imp->imp_deactive = 0;
258         rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
259
260         RETURN(rc);
261 }
262
263 int ptlrpc_import_in_recovery(struct obd_import *imp)
264 {
265         int in_recovery = 1;
266         spin_lock(&imp->imp_lock);
267         if (imp->imp_state == LUSTRE_IMP_FULL ||
268             imp->imp_state == LUSTRE_IMP_CLOSED ||
269             imp->imp_state == LUSTRE_IMP_DISCON)
270                 in_recovery = 0;
271         spin_unlock(&imp->imp_lock);
272         return in_recovery;
273 }
274
275 static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
276                                           char *new_uuid)
277 {
278         int rc;
279         int in_recovery = 0;
280         struct l_wait_info lwi;
281         ENTRY;
282
283         /* Check if reconnect is already in progress */
284         spin_lock(&imp->imp_lock);
285         if (imp->imp_state != LUSTRE_IMP_DISCON) {
286                 in_recovery = 1;
287         }
288         spin_unlock(&imp->imp_lock);
289
290         if (in_recovery == 1)
291                 RETURN(-EALREADY);
292
293         rc = ptlrpc_connect_import(imp, new_uuid);
294         if (rc)
295                 RETURN(rc);
296
297         CDEBUG(D_HA, "%s: recovery started, waiting\n",
298                obd2cli_tgt(imp->imp_obd));
299
300         lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), 
301                           NULL, NULL);
302         rc = l_wait_event(imp->imp_recovery_waitq,
303                           !ptlrpc_import_in_recovery(imp), &lwi);
304         CDEBUG(D_HA, "%s: recovery finished\n",
305                obd2cli_tgt(imp->imp_obd));
306
307         RETURN(rc);
308 }