1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ptlrpc/recover.c
38 * Author: Mike Shaver <shaver@clusterfs.com>
41 #define DEBUG_SUBSYSTEM S_RPC
43 # include <libcfs/libcfs.h>
45 # include <liblustre.h>
48 #include <obd_support.h>
49 #include <lustre_ha.h>
50 #include <lustre_net.h>
51 #include <lustre_import.h>
52 #include <lustre_export.h>
55 #include <obd_class.h>
56 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
57 #include <libcfs/list.h>
59 #include "ptlrpc_internal.h"
61 static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
64 * Start recovery on disconnected import.
65 * This is done by just attempting a connect
67 void ptlrpc_initiate_recovery(struct obd_import *imp)
71 CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
72 ptlrpc_connect_import(imp, NULL);
78 * Identify what request from replay list needs to be replayed next
79 * (based on what we have already replayed) and send it to server.
81 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
84 cfs_list_t *tmp, *pos;
85 struct ptlrpc_request *req = NULL;
91 /* It might have committed some after we last spoke, so make sure we
92 * get rid of them now.
94 cfs_spin_lock(&imp->imp_lock);
95 imp->imp_last_transno_checked = 0;
96 ptlrpc_free_committed(imp);
97 last_transno = imp->imp_last_replay_transno;
98 cfs_spin_unlock(&imp->imp_lock);
100 CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
101 imp, obd2cli_tgt(imp->imp_obd),
102 imp->imp_peer_committed_transno, last_transno);
104 /* Do I need to hold a lock across this iteration? We shouldn't be
105 * racing with any additions to the list, because we're in recovery
106 * and are therefore not processing additional requests to add. Calls
107 * to ptlrpc_free_committed might commit requests, but nothing "newer"
108 * than the one we're replaying (it can't be committed until it's
109 * replayed, and we're doing that here). l_f_e_safe protects against
110 * problems with the current request being committed, in the unlikely
111 * event of that race. So, in conclusion, I think that it's safe to
112 * perform this list-walk without the imp_lock held.
114 * But, the {mdc,osc}_replay_open callbacks both iterate
115 * request lists, and have comments saying they assume the
116 * imp_lock is being held by ptlrpc_replay, but it's not. it's
117 * just a little race...
119 cfs_list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
120 req = cfs_list_entry(tmp, struct ptlrpc_request,
123 /* If need to resend the last sent transno (because a
124 reconnect has occurred), then stop on the matching
125 req and send it again. If, however, the last sent
126 transno has been committed then we continue replay
127 from the next request. */
128 if (req->rq_transno > last_transno) {
129 if (imp->imp_resend_replay)
130 lustre_msg_add_flags(req->rq_reqmsg,
137 cfs_spin_lock(&imp->imp_lock);
138 imp->imp_resend_replay = 0;
139 cfs_spin_unlock(&imp->imp_lock);
142 rc = ptlrpc_replay_req(req);
144 CERROR("recovery replay error %d for req "
145 LPU64"\n", rc, req->rq_xid);
154 * Schedule resending of request on sending_list. This is done after
155 * we completed replaying of requests and locks.
157 int ptlrpc_resend(struct obd_import *imp)
159 struct ptlrpc_request *req, *next;
163 /* As long as we're in recovery, nothing should be added to the sending
164 * list, so we don't need to hold the lock during this iteration and
167 /* Well... what if lctl recover is called twice at the same time?
169 cfs_spin_lock(&imp->imp_lock);
170 if (imp->imp_state != LUSTRE_IMP_RECOVER) {
171 cfs_spin_unlock(&imp->imp_lock);
175 cfs_list_for_each_entry_safe(req, next, &imp->imp_sending_list,
177 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
178 "req %p bad\n", req);
179 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
180 if (!ptlrpc_no_resend(req))
181 ptlrpc_resend_req(req);
183 cfs_spin_unlock(&imp->imp_lock);
189 * Go through all requests in delayed list and wake their threads
192 void ptlrpc_wake_delayed(struct obd_import *imp)
194 cfs_list_t *tmp, *pos;
195 struct ptlrpc_request *req;
197 cfs_spin_lock(&imp->imp_lock);
198 cfs_list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
199 req = cfs_list_entry(tmp, struct ptlrpc_request, rq_list);
201 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
202 ptlrpc_client_wake_req(req);
204 cfs_spin_unlock(&imp->imp_lock);
207 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
209 struct obd_import *imp = failed_req->rq_import;
212 CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
213 imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
214 imp->imp_connection->c_remote_uuid.uuid);
216 if (ptlrpc_set_import_discon(imp,
217 lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
218 if (!imp->imp_replayable) {
219 CDEBUG(D_HA, "import %s@%s for %s not replayable, "
220 "auto-deactivating\n",
221 obd2cli_tgt(imp->imp_obd),
222 imp->imp_connection->c_remote_uuid.uuid,
223 imp->imp_obd->obd_name);
224 ptlrpc_deactivate_import(imp);
226 /* to control recovery via lctl {disable|enable}_recovery */
227 if (imp->imp_deactive == 0)
228 ptlrpc_connect_import(imp, NULL);
231 /* Wait for recovery to complete and resend. If evicted, then
232 this request will be errored out later.*/
233 cfs_spin_lock(&failed_req->rq_lock);
234 if (!failed_req->rq_no_resend)
235 failed_req->rq_resend = 1;
236 cfs_spin_unlock(&failed_req->rq_lock);
242 * Administratively active/deactive a client.
243 * This should only be called by the ioctl interface, currently
244 * - the lctl deactivate and activate commands
245 * - echo 0/1 >> /proc/osc/XXX/active
246 * - client umount -f (ll_umount_begin)
248 int ptlrpc_set_import_active(struct obd_import *imp, int active)
250 struct obd_device *obd = imp->imp_obd;
256 /* When deactivating, mark import invalid, and abort in-flight
259 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
260 "request\n", obd2cli_tgt(imp->imp_obd));
262 /* set before invalidate to avoid messages about imp_inval
263 * set without imp_deactive in ptlrpc_import_delay_req */
264 cfs_spin_lock(&imp->imp_lock);
265 imp->imp_deactive = 1;
266 cfs_spin_unlock(&imp->imp_lock);
268 ptlrpc_invalidate_import(imp);
271 /* When activating, mark import valid, and attempt recovery */
273 CDEBUG(D_HA, "setting import %s VALID\n",
274 obd2cli_tgt(imp->imp_obd));
275 rc = ptlrpc_recover_import(imp, NULL);
281 /* Attempt to reconnect an import */
282 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
287 cfs_spin_lock(&imp->imp_lock);
288 if (cfs_atomic_read(&imp->imp_inval_count)) {
289 cfs_spin_unlock(&imp->imp_lock);
292 cfs_spin_unlock(&imp->imp_lock);
294 /* force import to be disconnected. */
295 ptlrpc_set_import_discon(imp, 0);
297 cfs_spin_lock(&imp->imp_lock);
298 imp->imp_deactive = 0;
299 cfs_spin_unlock(&imp->imp_lock);
301 rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
306 int ptlrpc_import_in_recovery(struct obd_import *imp)
309 cfs_spin_lock(&imp->imp_lock);
310 if (imp->imp_state == LUSTRE_IMP_FULL ||
311 imp->imp_state == LUSTRE_IMP_CLOSED ||
312 imp->imp_state == LUSTRE_IMP_DISCON)
314 cfs_spin_unlock(&imp->imp_lock);
318 static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
323 struct l_wait_info lwi;
326 /* Check if reconnect is already in progress */
327 cfs_spin_lock(&imp->imp_lock);
328 if (imp->imp_state != LUSTRE_IMP_DISCON) {
331 cfs_spin_unlock(&imp->imp_lock);
333 if (in_recovery == 1)
336 rc = ptlrpc_connect_import(imp, new_uuid);
340 CDEBUG(D_HA, "%s: recovery started, waiting\n",
341 obd2cli_tgt(imp->imp_obd));
343 lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout)),
345 rc = l_wait_event(imp->imp_recovery_waitq,
346 !ptlrpc_import_in_recovery(imp), &lwi);
347 CDEBUG(D_HA, "%s: recovery finished\n",
348 obd2cli_tgt(imp->imp_obd));