1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ptlrpc/recover.c
38 * Author: Mike Shaver <shaver@clusterfs.com>
41 #define DEBUG_SUBSYSTEM S_RPC
43 # include <libcfs/libcfs.h>
45 # include <liblustre.h>
48 #include <obd_support.h>
49 #include <lustre_ha.h>
50 #include <lustre_net.h>
51 #include <lustre_import.h>
52 #include <lustre_export.h>
55 #include <obd_class.h>
56 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
57 #include <libcfs/list.h>
59 #include "ptlrpc_internal.h"
61 static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
63 void ptlrpc_initiate_recovery(struct obd_import *imp)
67 CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
68 ptlrpc_connect_import(imp, NULL);
73 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
76 struct list_head *tmp, *pos;
77 struct ptlrpc_request *req = NULL;
83 /* It might have committed some after we last spoke, so make sure we
84 * get rid of them now.
86 spin_lock(&imp->imp_lock);
87 imp->imp_last_transno_checked = 0;
88 ptlrpc_free_committed(imp);
89 last_transno = imp->imp_last_replay_transno;
90 spin_unlock(&imp->imp_lock);
92 CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
93 imp, obd2cli_tgt(imp->imp_obd),
94 imp->imp_peer_committed_transno, last_transno);
96 /* Do I need to hold a lock across this iteration? We shouldn't be
97 * racing with any additions to the list, because we're in recovery
98 * and are therefore not processing additional requests to add. Calls
99 * to ptlrpc_free_committed might commit requests, but nothing "newer"
100 * than the one we're replaying (it can't be committed until it's
101 * replayed, and we're doing that here). l_f_e_safe protects against
102 * problems with the current request being committed, in the unlikely
103 * event of that race. So, in conclusion, I think that it's safe to
104 * perform this list-walk without the imp_lock held.
106 * But, the {mdc,osc}_replay_open callbacks both iterate
107 * request lists, and have comments saying they assume the
108 * imp_lock is being held by ptlrpc_replay, but it's not. it's
109 * just a little race...
111 list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
112 req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
114 /* If need to resend the last sent transno (because a
115 reconnect has occurred), then stop on the matching
116 req and send it again. If, however, the last sent
117 transno has been committed then we continue replay
118 from the next request. */
119 if (imp->imp_resend_replay &&
120 req->rq_transno == last_transno) {
121 lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
125 if (req->rq_transno > last_transno) {
126 imp->imp_last_replay_transno = req->rq_transno;
133 spin_lock(&imp->imp_lock);
134 imp->imp_resend_replay = 0;
135 spin_unlock(&imp->imp_lock);
138 rc = ptlrpc_replay_req(req);
140 CERROR("recovery replay error %d for req "
141 LPD64"\n", rc, req->rq_xid);
149 int ptlrpc_resend(struct obd_import *imp)
151 struct ptlrpc_request *req, *next;
155 /* As long as we're in recovery, nothing should be added to the sending
156 * list, so we don't need to hold the lock during this iteration and
159 /* Well... what if lctl recover is called twice at the same time?
161 spin_lock(&imp->imp_lock);
162 if (imp->imp_state != LUSTRE_IMP_RECOVER) {
163 spin_unlock(&imp->imp_lock);
167 list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) {
168 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
169 "req %p bad\n", req);
170 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
171 if (!req->rq_no_resend)
172 ptlrpc_resend_req(req);
174 spin_unlock(&imp->imp_lock);
179 void ptlrpc_wake_delayed(struct obd_import *imp)
181 struct list_head *tmp, *pos;
182 struct ptlrpc_request *req;
184 spin_lock(&imp->imp_lock);
185 list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
186 req = list_entry(tmp, struct ptlrpc_request, rq_list);
188 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
189 ptlrpc_wake_client_req(req);
191 spin_unlock(&imp->imp_lock);
194 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
196 struct obd_import *imp = failed_req->rq_import;
199 CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
200 imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
201 imp->imp_connection->c_remote_uuid.uuid);
203 if (ptlrpc_set_import_discon(imp,
204 lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
205 if (!imp->imp_replayable) {
206 CDEBUG(D_HA, "import %s@%s for %s not replayable, "
207 "auto-deactivating\n",
208 obd2cli_tgt(imp->imp_obd),
209 imp->imp_connection->c_remote_uuid.uuid,
210 imp->imp_obd->obd_name);
211 ptlrpc_deactivate_import(imp);
213 /* to control recovery via lctl {disable|enable}_recovery */
214 if (imp->imp_deactive == 0)
215 ptlrpc_connect_import(imp, NULL);
218 /* Wait for recovery to complete and resend. If evicted, then
219 this request will be errored out later.*/
220 spin_lock(&failed_req->rq_lock);
221 if (!failed_req->rq_no_resend)
222 failed_req->rq_resend = 1;
223 spin_unlock(&failed_req->rq_lock);
229 * Administratively active/deactive a client.
230 * This should only be called by the ioctl interface, currently
231 * - the lctl deactivate and activate commands
232 * - echo 0/1 >> /proc/osc/XXX/active
233 * - client umount -f (ll_umount_begin)
235 int ptlrpc_set_import_active(struct obd_import *imp, int active)
237 struct obd_device *obd = imp->imp_obd;
243 /* When deactivating, mark import invalid, and abort in-flight
246 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
247 "request\n", obd2cli_tgt(imp->imp_obd));
248 ptlrpc_invalidate_import(imp);
250 spin_lock(&imp->imp_lock);
251 imp->imp_deactive = 1;
252 spin_unlock(&imp->imp_lock);
255 /* When activating, mark import valid, and attempt recovery */
257 spin_lock(&imp->imp_lock);
258 imp->imp_deactive = 0;
259 spin_unlock(&imp->imp_lock);
261 CDEBUG(D_HA, "setting import %s VALID\n",
262 obd2cli_tgt(imp->imp_obd));
263 rc = ptlrpc_recover_import(imp, NULL);
269 /* Attempt to reconnect an import */
270 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
275 /* force import to be disconnected. */
276 ptlrpc_set_import_discon(imp, 0);
278 spin_lock(&imp->imp_lock);
279 imp->imp_deactive = 0;
280 spin_unlock(&imp->imp_lock);
282 rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
287 int ptlrpc_import_in_recovery(struct obd_import *imp)
290 spin_lock(&imp->imp_lock);
291 if (imp->imp_state == LUSTRE_IMP_FULL ||
292 imp->imp_state == LUSTRE_IMP_CLOSED ||
293 imp->imp_state == LUSTRE_IMP_DISCON)
295 spin_unlock(&imp->imp_lock);
299 static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
304 struct l_wait_info lwi;
307 /* Check if reconnect is already in progress */
308 spin_lock(&imp->imp_lock);
309 if (imp->imp_state != LUSTRE_IMP_DISCON) {
312 spin_unlock(&imp->imp_lock);
314 if (in_recovery == 1)
317 rc = ptlrpc_connect_import(imp, new_uuid);
321 CDEBUG(D_HA, "%s: recovery started, waiting\n",
322 obd2cli_tgt(imp->imp_obd));
324 lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout)),
326 rc = l_wait_event(imp->imp_recovery_waitq,
327 !ptlrpc_import_in_recovery(imp), &lwi);
328 CDEBUG(D_HA, "%s: recovery finished\n",
329 obd2cli_tgt(imp->imp_obd));