1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Portal-RPC reconnection and replay operations, for use in recovery.
6 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
7 * Author: Mike Shaver <shaver@clusterfs.com>
9 * This file is part of the Lustre file system, http://www.lustre.org
10 * Lustre is a trademark of Cluster File Systems, Inc.
12 * You may have signed or agreed to another license before downloading
13 * this software. If so, you are bound by the terms and conditions
14 * of that agreement, and the following does not apply to you. See the
15 * LICENSE file included with this distribution for more information.
17 * If you did not agree to a different license, then this copy of Lustre
18 * is open source software; you can redistribute it and/or modify it
19 * under the terms of version 2 of the GNU General Public License as
20 * published by the Free Software Foundation.
22 * In either case, Lustre is distributed in the hope that it will be
23 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
24 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * license text for more details.
28 #define DEBUG_SUBSYSTEM S_RPC
30 # include <libcfs/libcfs.h>
32 # include <liblustre.h>
35 #include <obd_support.h>
36 #include <lustre_ha.h>
37 #include <lustre_net.h>
38 #include <lustre_import.h>
39 #include <lustre_export.h>
42 #include <obd_class.h>
43 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
44 #include <libcfs/list.h>
46 #include "ptlrpc_internal.h"
48 static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
50 void ptlrpc_initiate_recovery(struct obd_import *imp)
54 CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
55 ptlrpc_connect_import(imp, NULL);
60 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
63 struct list_head *tmp, *pos;
64 struct ptlrpc_request *req = NULL;
70 /* It might have committed some after we last spoke, so make sure we
71 * get rid of them now.
73 spin_lock(&imp->imp_lock);
74 imp->imp_last_transno_checked = 0;
75 ptlrpc_free_committed(imp);
76 last_transno = imp->imp_last_replay_transno;
77 spin_unlock(&imp->imp_lock);
79 CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
80 imp, obd2cli_tgt(imp->imp_obd),
81 imp->imp_peer_committed_transno, last_transno);
83 /* Do I need to hold a lock across this iteration? We shouldn't be
84 * racing with any additions to the list, because we're in recovery
85 * and are therefore not processing additional requests to add. Calls
86 * to ptlrpc_free_committed might commit requests, but nothing "newer"
87 * than the one we're replaying (it can't be committed until it's
88 * replayed, and we're doing that here). l_f_e_safe protects against
89 * problems with the current request being committed, in the unlikely
90 * event of that race. So, in conclusion, I think that it's safe to
91 * perform this list-walk without the imp_lock held.
93 * But, the {mdc,osc}_replay_open callbacks both iterate
94 * request lists, and have comments saying they assume the
95 * imp_lock is being held by ptlrpc_replay, but it's not. it's
96 * just a little race...
98 list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
99 req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
101 /* If need to resend the last sent transno (because a
102 reconnect has occurred), then stop on the matching
103 req and send it again. If, however, the last sent
104 transno has been committed then we continue replay
105 from the next request. */
106 if (imp->imp_resend_replay &&
107 req->rq_transno == last_transno) {
108 lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
112 if (req->rq_transno > last_transno) {
113 imp->imp_last_replay_transno = req->rq_transno;
120 spin_lock(&imp->imp_lock);
121 imp->imp_resend_replay = 0;
122 spin_unlock(&imp->imp_lock);
125 rc = ptlrpc_replay_req(req);
127 CERROR("recovery replay error %d for req "
128 LPD64"\n", rc, req->rq_xid);
136 int ptlrpc_resend(struct obd_import *imp)
138 struct ptlrpc_request *req, *next;
142 /* As long as we're in recovery, nothing should be added to the sending
143 * list, so we don't need to hold the lock during this iteration and
146 /* Well... what if lctl recover is called twice at the same time?
148 spin_lock(&imp->imp_lock);
149 if (imp->imp_state != LUSTRE_IMP_RECOVER) {
150 spin_unlock(&imp->imp_lock);
154 list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) {
155 LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON,
156 "req %p bad\n", req);
157 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
158 if (!req->rq_no_resend)
159 ptlrpc_resend_req(req);
161 spin_unlock(&imp->imp_lock);
166 void ptlrpc_wake_delayed(struct obd_import *imp)
168 struct list_head *tmp, *pos;
169 struct ptlrpc_request *req;
171 spin_lock(&imp->imp_lock);
172 list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
173 req = list_entry(tmp, struct ptlrpc_request, rq_list);
175 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
176 ptlrpc_wake_client_req(req);
178 spin_unlock(&imp->imp_lock);
181 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
183 struct obd_import *imp = failed_req->rq_import;
186 CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
187 imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
188 imp->imp_connection->c_remote_uuid.uuid);
190 if (ptlrpc_set_import_discon(imp,
191 lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
192 if (!imp->imp_replayable) {
193 CDEBUG(D_HA, "import %s@%s for %s not replayable, "
194 "auto-deactivating\n",
195 obd2cli_tgt(imp->imp_obd),
196 imp->imp_connection->c_remote_uuid.uuid,
197 imp->imp_obd->obd_name);
198 ptlrpc_deactivate_import(imp);
200 /* to control recovery via lctl {disable|enable}_recovery */
201 if (imp->imp_deactive == 0)
202 ptlrpc_connect_import(imp, NULL);
205 /* Wait for recovery to complete and resend. If evicted, then
206 this request will be errored out later.*/
207 spin_lock(&failed_req->rq_lock);
208 if (!failed_req->rq_no_resend)
209 failed_req->rq_resend = 1;
210 spin_unlock(&failed_req->rq_lock);
216 * Administratively active/deactive a client.
217 * This should only be called by the ioctl interface, currently
218 * - the lctl deactivate and activate commands
219 * - echo 0/1 >> /proc/osc/XXX/active
220 * - client umount -f (ll_umount_begin)
222 int ptlrpc_set_import_active(struct obd_import *imp, int active)
224 struct obd_device *obd = imp->imp_obd;
230 /* When deactivating, mark import invalid, and abort in-flight
233 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
234 "request\n", obd2cli_tgt(imp->imp_obd));
235 ptlrpc_invalidate_import(imp);
237 spin_lock(&imp->imp_lock);
238 imp->imp_deactive = 1;
239 spin_unlock(&imp->imp_lock);
242 /* When activating, mark import valid, and attempt recovery */
244 spin_lock(&imp->imp_lock);
245 imp->imp_deactive = 0;
246 spin_unlock(&imp->imp_lock);
248 CDEBUG(D_HA, "setting import %s VALID\n",
249 obd2cli_tgt(imp->imp_obd));
250 rc = ptlrpc_recover_import(imp, NULL);
256 /* Attempt to reconnect an import */
257 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
262 /* force import to be disconnected. */
263 ptlrpc_set_import_discon(imp, 0);
265 spin_lock(&imp->imp_lock);
266 imp->imp_deactive = 0;
267 spin_unlock(&imp->imp_lock);
269 rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
274 int ptlrpc_import_in_recovery(struct obd_import *imp)
277 spin_lock(&imp->imp_lock);
278 if (imp->imp_state == LUSTRE_IMP_FULL ||
279 imp->imp_state == LUSTRE_IMP_CLOSED ||
280 imp->imp_state == LUSTRE_IMP_DISCON)
282 spin_unlock(&imp->imp_lock);
286 static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
291 struct l_wait_info lwi;
294 /* Check if reconnect is already in progress */
295 spin_lock(&imp->imp_lock);
296 if (imp->imp_state != LUSTRE_IMP_DISCON) {
299 spin_unlock(&imp->imp_lock);
301 if (in_recovery == 1)
304 rc = ptlrpc_connect_import(imp, new_uuid);
308 CDEBUG(D_HA, "%s: recovery started, waiting\n",
309 obd2cli_tgt(imp->imp_obd));
311 lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout)),
313 rc = l_wait_event(imp->imp_recovery_waitq,
314 !ptlrpc_import_in_recovery(imp), &lwi);
315 CDEBUG(D_HA, "%s: recovery finished\n",
316 obd2cli_tgt(imp->imp_obd));