4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ptlrpc/recover.c
38 * Author: Mike Shaver <shaver@clusterfs.com>
41 #define DEBUG_SUBSYSTEM S_RPC
43 # include <libcfs/libcfs.h>
45 # include <liblustre.h>
48 #include <obd_support.h>
49 #include <lustre_ha.h>
50 #include <lustre_net.h>
51 #include <lustre_import.h>
52 #include <lustre_export.h>
55 #include <obd_class.h>
56 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
57 #include <libcfs/list.h>
59 #include "ptlrpc_internal.h"
62 * Start recovery on disconnected import.
63 * This is done by just attempting a connect
65 void ptlrpc_initiate_recovery(struct obd_import *imp)
69 CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
70 ptlrpc_connect_import(imp);
76 * Identify what request from replay list needs to be replayed next
77 * (based on what we have already replayed) and send it to server.
79 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
82 cfs_list_t *tmp, *pos;
83 struct ptlrpc_request *req = NULL;
89 /* It might have committed some after we last spoke, so make sure we
90 * get rid of them now.
92 spin_lock(&imp->imp_lock);
93 imp->imp_last_transno_checked = 0;
94 ptlrpc_free_committed(imp);
95 last_transno = imp->imp_last_replay_transno;
96 spin_unlock(&imp->imp_lock);
98 CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
99 imp, obd2cli_tgt(imp->imp_obd),
100 imp->imp_peer_committed_transno, last_transno);
102 /* Do I need to hold a lock across this iteration? We shouldn't be
103 * racing with any additions to the list, because we're in recovery
104 * and are therefore not processing additional requests to add. Calls
105 * to ptlrpc_free_committed might commit requests, but nothing "newer"
106 * than the one we're replaying (it can't be committed until it's
107 * replayed, and we're doing that here). l_f_e_safe protects against
108 * problems with the current request being committed, in the unlikely
109 * event of that race. So, in conclusion, I think that it's safe to
110 * perform this list-walk without the imp_lock held.
112 * But, the {mdc,osc}_replay_open callbacks both iterate
113 * request lists, and have comments saying they assume the
114 * imp_lock is being held by ptlrpc_replay, but it's not. it's
115 * just a little race...
117 cfs_list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
118 req = cfs_list_entry(tmp, struct ptlrpc_request,
121 /* If need to resend the last sent transno (because a
122 reconnect has occurred), then stop on the matching
123 req and send it again. If, however, the last sent
124 transno has been committed then we continue replay
125 from the next request. */
126 if (req->rq_transno > last_transno) {
127 if (imp->imp_resend_replay)
128 lustre_msg_add_flags(req->rq_reqmsg,
135 spin_lock(&imp->imp_lock);
136 imp->imp_resend_replay = 0;
137 spin_unlock(&imp->imp_lock);
140 rc = ptlrpc_replay_req(req);
142 CERROR("recovery replay error %d for req "
143 LPU64"\n", rc, req->rq_xid);
152 * Schedule resending of request on sending_list. This is done after
153 * we completed replaying of requests and locks.
155 int ptlrpc_resend(struct obd_import *imp)
157 struct ptlrpc_request *req, *next;
161 /* As long as we're in recovery, nothing should be added to the sending
162 * list, so we don't need to hold the lock during this iteration and
165 /* Well... what if lctl recover is called twice at the same time?
167 spin_lock(&imp->imp_lock);
168 if (imp->imp_state != LUSTRE_IMP_RECOVER) {
169 spin_unlock(&imp->imp_lock);
173 cfs_list_for_each_entry_safe(req, next, &imp->imp_sending_list,
175 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
176 "req %p bad\n", req);
177 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
178 if (!ptlrpc_no_resend(req))
179 ptlrpc_resend_req(req);
181 spin_unlock(&imp->imp_lock);
185 EXPORT_SYMBOL(ptlrpc_resend);
188 * Go through all requests in delayed list and wake their threads
191 void ptlrpc_wake_delayed(struct obd_import *imp)
193 cfs_list_t *tmp, *pos;
194 struct ptlrpc_request *req;
196 spin_lock(&imp->imp_lock);
197 cfs_list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
198 req = cfs_list_entry(tmp, struct ptlrpc_request, rq_list);
200 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
201 ptlrpc_client_wake_req(req);
203 spin_unlock(&imp->imp_lock);
205 EXPORT_SYMBOL(ptlrpc_wake_delayed);
207 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
209 struct obd_import *imp = failed_req->rq_import;
212 CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
213 imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
214 imp->imp_connection->c_remote_uuid.uuid);
216 if (ptlrpc_set_import_discon(imp,
217 lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
218 if (!imp->imp_replayable) {
219 CDEBUG(D_HA, "import %s@%s for %s not replayable, "
220 "auto-deactivating\n",
221 obd2cli_tgt(imp->imp_obd),
222 imp->imp_connection->c_remote_uuid.uuid,
223 imp->imp_obd->obd_name);
224 ptlrpc_deactivate_import(imp);
226 /* to control recovery via lctl {disable|enable}_recovery */
227 if (imp->imp_deactive == 0)
228 ptlrpc_connect_import(imp);
231 /* Wait for recovery to complete and resend. If evicted, then
232 this request will be errored out later.*/
233 spin_lock(&failed_req->rq_lock);
234 if (!failed_req->rq_no_resend)
235 failed_req->rq_resend = 1;
236 spin_unlock(&failed_req->rq_lock);
242 * Administratively active/deactive a client.
243 * This should only be called by the ioctl interface, currently
244 * - the lctl deactivate and activate commands
245 * - echo 0/1 >> /proc/osc/XXX/active
246 * - client umount -f (ll_umount_begin)
248 int ptlrpc_set_import_active(struct obd_import *imp, int active)
250 struct obd_device *obd = imp->imp_obd;
256 /* When deactivating, mark import invalid, and abort in-flight
259 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
260 "request\n", obd2cli_tgt(imp->imp_obd));
262 /* set before invalidate to avoid messages about imp_inval
263 * set without imp_deactive in ptlrpc_import_delay_req */
264 spin_lock(&imp->imp_lock);
265 imp->imp_deactive = 1;
266 spin_unlock(&imp->imp_lock);
268 obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE);
270 ptlrpc_invalidate_import(imp);
273 /* When activating, mark import valid, and attempt recovery */
275 CDEBUG(D_HA, "setting import %s VALID\n",
276 obd2cli_tgt(imp->imp_obd));
278 spin_lock(&imp->imp_lock);
279 imp->imp_deactive = 0;
280 spin_unlock(&imp->imp_lock);
281 obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE);
283 rc = ptlrpc_recover_import(imp, NULL, 0);
288 EXPORT_SYMBOL(ptlrpc_set_import_active);
290 /* Attempt to reconnect an import */
291 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
296 spin_lock(&imp->imp_lock);
297 if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive ||
298 cfs_atomic_read(&imp->imp_inval_count))
300 spin_unlock(&imp->imp_lock);
304 /* force import to be disconnected. */
305 ptlrpc_set_import_discon(imp, 0);
308 struct obd_uuid uuid;
310 /* intruct import to use new uuid */
311 obd_str2uuid(&uuid, new_uuid);
312 rc = import_set_conn_priority(imp, &uuid);
317 /* Check if reconnect is already in progress */
318 spin_lock(&imp->imp_lock);
319 if (imp->imp_state != LUSTRE_IMP_DISCON) {
320 imp->imp_force_verify = 1;
323 spin_unlock(&imp->imp_lock);
327 rc = ptlrpc_connect_import(imp);
332 struct l_wait_info lwi;
333 int secs = cfs_time_seconds(obd_timeout);
335 CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
336 obd2cli_tgt(imp->imp_obd), secs);
338 lwi = LWI_TIMEOUT(secs, NULL, NULL);
339 rc = l_wait_event(imp->imp_recovery_waitq,
340 !ptlrpc_import_in_recovery(imp), &lwi);
341 CDEBUG(D_HA, "%s: recovery finished\n",
342 obd2cli_tgt(imp->imp_obd));
349 EXPORT_SYMBOL(ptlrpc_recover_import);
351 int ptlrpc_import_in_recovery(struct obd_import *imp)
354 spin_lock(&imp->imp_lock);
355 if (imp->imp_state == LUSTRE_IMP_FULL ||
356 imp->imp_state == LUSTRE_IMP_CLOSED ||
357 imp->imp_state == LUSTRE_IMP_DISCON)
359 spin_unlock(&imp->imp_lock);