1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ptlrpc/recover.c
38 * Author: Mike Shaver <shaver@clusterfs.com>
41 #define DEBUG_SUBSYSTEM S_RPC
43 # include <libcfs/libcfs.h>
45 # include <liblustre.h>
48 #include <obd_support.h>
49 #include <lustre_ha.h>
50 #include <lustre_net.h>
51 #include <lustre_import.h>
52 #include <lustre_export.h>
55 #include <obd_class.h>
56 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
57 #include <libcfs/list.h>
59 #include "ptlrpc_internal.h"
61 static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
63 void ptlrpc_initiate_recovery(struct obd_import *imp)
67 CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
68 ptlrpc_connect_import(imp, NULL);
73 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
76 cfs_list_t *tmp, *pos;
77 struct ptlrpc_request *req = NULL;
83 /* It might have committed some after we last spoke, so make sure we
84 * get rid of them now.
86 cfs_spin_lock(&imp->imp_lock);
87 imp->imp_last_transno_checked = 0;
88 ptlrpc_free_committed(imp);
89 last_transno = imp->imp_last_replay_transno;
90 cfs_spin_unlock(&imp->imp_lock);
92 CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
93 imp, obd2cli_tgt(imp->imp_obd),
94 imp->imp_peer_committed_transno, last_transno);
96 /* Do I need to hold a lock across this iteration? We shouldn't be
97 * racing with any additions to the list, because we're in recovery
98 * and are therefore not processing additional requests to add. Calls
99 * to ptlrpc_free_committed might commit requests, but nothing "newer"
100 * than the one we're replaying (it can't be committed until it's
101 * replayed, and we're doing that here). l_f_e_safe protects against
102 * problems with the current request being committed, in the unlikely
103 * event of that race. So, in conclusion, I think that it's safe to
104 * perform this list-walk without the imp_lock held.
106 * But, the {mdc,osc}_replay_open callbacks both iterate
107 * request lists, and have comments saying they assume the
108 * imp_lock is being held by ptlrpc_replay, but it's not. it's
109 * just a little race...
111 cfs_list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
112 req = cfs_list_entry(tmp, struct ptlrpc_request,
115 /* If need to resend the last sent transno (because a
116 reconnect has occurred), then stop on the matching
117 req and send it again. If, however, the last sent
118 transno has been committed then we continue replay
119 from the next request. */
120 if (req->rq_transno > last_transno) {
121 if (imp->imp_resend_replay)
122 lustre_msg_add_flags(req->rq_reqmsg,
129 cfs_spin_lock(&imp->imp_lock);
130 imp->imp_resend_replay = 0;
131 cfs_spin_unlock(&imp->imp_lock);
134 rc = ptlrpc_replay_req(req);
136 CERROR("recovery replay error %d for req "
137 LPU64"\n", rc, req->rq_xid);
145 int ptlrpc_resend(struct obd_import *imp)
147 struct ptlrpc_request *req, *next;
151 /* As long as we're in recovery, nothing should be added to the sending
152 * list, so we don't need to hold the lock during this iteration and
155 /* Well... what if lctl recover is called twice at the same time?
157 cfs_spin_lock(&imp->imp_lock);
158 if (imp->imp_state != LUSTRE_IMP_RECOVER) {
159 cfs_spin_unlock(&imp->imp_lock);
163 cfs_list_for_each_entry_safe(req, next, &imp->imp_sending_list,
165 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
166 "req %p bad\n", req);
167 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
168 if (!req->rq_no_resend)
169 ptlrpc_resend_req(req);
171 cfs_spin_unlock(&imp->imp_lock);
176 void ptlrpc_wake_delayed(struct obd_import *imp)
178 cfs_list_t *tmp, *pos;
179 struct ptlrpc_request *req;
181 cfs_spin_lock(&imp->imp_lock);
182 cfs_list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
183 req = cfs_list_entry(tmp, struct ptlrpc_request, rq_list);
185 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
186 ptlrpc_client_wake_req(req);
188 cfs_spin_unlock(&imp->imp_lock);
191 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
193 struct obd_import *imp = failed_req->rq_import;
196 CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
197 imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
198 imp->imp_connection->c_remote_uuid.uuid);
200 if (ptlrpc_set_import_discon(imp,
201 lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
202 if (!imp->imp_replayable) {
203 CDEBUG(D_HA, "import %s@%s for %s not replayable, "
204 "auto-deactivating\n",
205 obd2cli_tgt(imp->imp_obd),
206 imp->imp_connection->c_remote_uuid.uuid,
207 imp->imp_obd->obd_name);
208 ptlrpc_deactivate_import(imp);
210 /* to control recovery via lctl {disable|enable}_recovery */
211 if (imp->imp_deactive == 0)
212 ptlrpc_connect_import(imp, NULL);
215 /* Wait for recovery to complete and resend. If evicted, then
216 this request will be errored out later.*/
217 cfs_spin_lock(&failed_req->rq_lock);
218 if (!failed_req->rq_no_resend)
219 failed_req->rq_resend = 1;
220 cfs_spin_unlock(&failed_req->rq_lock);
226 * Administratively active/deactive a client.
227 * This should only be called by the ioctl interface, currently
228 * - the lctl deactivate and activate commands
229 * - echo 0/1 >> /proc/osc/XXX/active
230 * - client umount -f (ll_umount_begin)
232 int ptlrpc_set_import_active(struct obd_import *imp, int active)
234 struct obd_device *obd = imp->imp_obd;
240 /* When deactivating, mark import invalid, and abort in-flight
243 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
244 "request\n", obd2cli_tgt(imp->imp_obd));
246 /* set before invalidate to avoid messages about imp_inval
247 * set without imp_deactive in ptlrpc_import_delay_req */
248 cfs_spin_lock(&imp->imp_lock);
249 imp->imp_deactive = 1;
250 cfs_spin_unlock(&imp->imp_lock);
252 ptlrpc_invalidate_import(imp);
255 /* When activating, mark import valid, and attempt recovery */
257 CDEBUG(D_HA, "setting import %s VALID\n",
258 obd2cli_tgt(imp->imp_obd));
259 rc = ptlrpc_recover_import(imp, NULL);
265 /* Attempt to reconnect an import */
266 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
271 cfs_spin_lock(&imp->imp_lock);
272 if (cfs_atomic_read(&imp->imp_inval_count)) {
273 cfs_spin_unlock(&imp->imp_lock);
276 cfs_spin_unlock(&imp->imp_lock);
278 /* force import to be disconnected. */
279 ptlrpc_set_import_discon(imp, 0);
281 cfs_spin_lock(&imp->imp_lock);
282 imp->imp_deactive = 0;
283 cfs_spin_unlock(&imp->imp_lock);
285 rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
290 int ptlrpc_import_in_recovery(struct obd_import *imp)
293 cfs_spin_lock(&imp->imp_lock);
294 if (imp->imp_state == LUSTRE_IMP_FULL ||
295 imp->imp_state == LUSTRE_IMP_CLOSED ||
296 imp->imp_state == LUSTRE_IMP_DISCON)
298 cfs_spin_unlock(&imp->imp_lock);
302 static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
307 struct l_wait_info lwi;
310 /* Check if reconnect is already in progress */
311 cfs_spin_lock(&imp->imp_lock);
312 if (imp->imp_state != LUSTRE_IMP_DISCON) {
315 cfs_spin_unlock(&imp->imp_lock);
317 if (in_recovery == 1)
320 rc = ptlrpc_connect_import(imp, new_uuid);
324 CDEBUG(D_HA, "%s: recovery started, waiting\n",
325 obd2cli_tgt(imp->imp_obd));
327 lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout)),
329 rc = l_wait_event(imp->imp_recovery_waitq,
330 !ptlrpc_import_in_recovery(imp), &lwi);
331 CDEBUG(D_HA, "%s: recovery finished\n",
332 obd2cli_tgt(imp->imp_obd));