1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #define DEBUG_SUBSYSTEM S_RPC
27 #include <liblustre.h>
30 #include <linux/obd_support.h>
31 #include <linux/obd_class.h>
32 #include <linux/lustre_lib.h>
33 #include <linux/lustre_ha.h>
34 #include <linux/lustre_import.h>
36 void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
37 struct ptlrpc_client *cl)
39 cl->cli_request_portal = req_portal;
40 cl->cli_reply_portal = rep_portal;
44 struct obd_uuid *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
46 return &req->rq_connection->c_remote_uuid;
49 struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
51 struct ptlrpc_connection *c;
52 struct ptlrpc_peer peer;
55 err = ptlrpc_uuid_to_peer(uuid, &peer);
57 CERROR("cannot find peer %s!\n", uuid->uuid);
61 c = ptlrpc_get_connection(&peer, uuid);
63 memcpy(c->c_remote_uuid.uuid,
64 uuid->uuid, sizeof(c->c_remote_uuid.uuid));
68 CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c);
73 void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,struct obd_uuid *uuid)
75 struct ptlrpc_peer peer;
78 err = ptlrpc_uuid_to_peer (uuid, &peer);
80 CERROR("cannot find peer %s!\n", uuid->uuid);
84 memcpy (&conn->c_peer, &peer, sizeof (peer));
88 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn)
90 struct ptlrpc_bulk_desc *desc;
92 OBD_ALLOC(desc, sizeof(*desc));
94 desc->bd_connection = ptlrpc_connection_addref(conn);
95 atomic_set(&desc->bd_refcount, 1);
96 init_waitqueue_head(&desc->bd_waitq);
97 INIT_LIST_HEAD(&desc->bd_page_list);
98 INIT_LIST_HEAD(&desc->bd_set_chain);
99 ptl_set_inv_handle(&desc->bd_md_h);
100 ptl_set_inv_handle(&desc->bd_me_h);
106 int ptlrpc_bulk_error(struct ptlrpc_bulk_desc *desc)
109 if (desc->bd_flags & PTL_RPC_FL_TIMEOUT) {
110 rc = (desc->bd_flags & PTL_RPC_FL_INTR ? -ERESTARTSYS :
116 struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc)
118 struct ptlrpc_bulk_page *bulk;
120 OBD_ALLOC(bulk, sizeof(*bulk));
122 bulk->bp_desc = desc;
123 list_add_tail(&bulk->bp_link, &desc->bd_page_list);
124 desc->bd_page_count++;
129 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
131 struct list_head *tmp, *next;
138 LASSERT(list_empty(&desc->bd_set_chain));
140 if (atomic_read(&desc->bd_refcount) != 0)
141 CERROR("freeing desc %p with refcount %d!\n", desc,
142 atomic_read(&desc->bd_refcount));
144 list_for_each_safe(tmp, next, &desc->bd_page_list) {
145 struct ptlrpc_bulk_page *bulk;
146 bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
147 ptlrpc_free_bulk_page(bulk);
150 ptlrpc_put_connection(desc->bd_connection);
152 OBD_FREE(desc, sizeof(*desc));
156 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
164 list_del(&bulk->bp_link);
165 bulk->bp_desc->bd_page_count--;
166 OBD_FREE(bulk, sizeof(*bulk));
170 static int ll_sync_brw_timeout(void *data)
172 struct obd_brw_set *set = data;
173 struct list_head *tmp;
179 set->brw_flags |= PTL_RPC_FL_TIMEOUT;
181 list_for_each(tmp, &set->brw_desc_head) {
182 struct ptlrpc_bulk_desc *desc =
183 list_entry(tmp, struct ptlrpc_bulk_desc, bd_set_chain);
185 /* Skip descriptors that were completed successfully. */
186 if (desc->bd_flags & (PTL_BULK_FL_RCVD | PTL_BULK_FL_SENT))
189 LASSERT(desc->bd_connection);
191 /* If PtlMDUnlink succeeds, then bulk I/O on the MD hasn't
192 * even started yet. XXX where do we kunmup the thing?
194 * If it fail with PTL_MD_BUSY, then the network is still
195 * reading/writing the buffers and we must wait for it to
196 * complete (which it will within finite time, most
197 * probably with failure; we really need portals error
198 * events to detect that).
200 * Otherwise (PTL_INV_MD) it completed after the bd_flags
203 if (PtlMDUnlink(desc->bd_md_h) != PTL_OK) {
204 CERROR("Near-miss on OST %s -- need to adjust "
206 desc->bd_connection->c_remote_uuid.uuid);
210 CERROR("IO of %d pages to/from %s:%d (conn %p) timed out\n",
212 desc->bd_connection->c_remote_uuid.uuid,
213 desc->bd_portal, desc->bd_connection);
215 /* This one will "never" arrive, don't wait for it. */
216 if (atomic_dec_and_test(&set->brw_refcount))
217 wake_up(&set->brw_waitq);
219 if (class_signal_connection_failure)
220 class_signal_connection_failure(desc->bd_connection);
225 /* 0 = We go back to sleep, until we're resumed or interrupted */
226 /* 1 = We can't be recovered, just abort the syscall with -ETIMEDOUT */
230 static int ll_sync_brw_intr(void *data)
232 struct obd_brw_set *set = data;
235 set->brw_flags |= PTL_RPC_FL_INTR;
236 RETURN(1); /* ignored, as of this writing */
239 int ll_brw_sync_wait(struct obd_brw_set *set, int phase)
241 struct l_wait_info lwi;
242 struct list_head *tmp, *next;
246 obd_brw_set_addref(set);
249 lwi = LWI_TIMEOUT_INTR(obd_timeout * HZ, ll_sync_brw_timeout,
250 ll_sync_brw_intr, set);
251 rc = l_wait_event(set->brw_waitq,
252 atomic_read(&set->brw_desc_count) == 0, &lwi);
254 list_for_each_safe(tmp, next, &set->brw_desc_head) {
255 struct ptlrpc_bulk_desc *desc =
256 list_entry(tmp, struct ptlrpc_bulk_desc,
258 list_del_init(&desc->bd_set_chain);
259 ptlrpc_bulk_decref(desc);
262 case CB_PHASE_FINISH:
263 if (atomic_dec_and_test(&set->brw_desc_count))
264 wake_up(&set->brw_waitq);
269 obd_brw_set_decref(set);
274 struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
275 int count, int *lengths, char **bufs)
277 struct ptlrpc_connection *conn;
278 struct ptlrpc_request *request;
282 LASSERT((unsigned long)imp > 0x1000);
283 conn = imp->imp_connection;
285 OBD_ALLOC(request, sizeof(*request));
287 CERROR("request allocation out of memory\n");
291 rc = lustre_pack_msg(count, lengths, bufs,
292 &request->rq_reqlen, &request->rq_reqmsg);
294 CERROR("cannot pack request %d\n", rc);
295 OBD_FREE(request, sizeof(*request));
299 request->rq_timeout = obd_timeout;
300 request->rq_level = LUSTRE_CONN_FULL;
301 request->rq_type = PTL_RPC_MSG_REQUEST;
302 request->rq_import = imp;
304 /* XXX FIXME bug 625069, now 249 */
305 request->rq_request_portal = imp->imp_client->cli_request_portal;
306 request->rq_reply_portal = imp->imp_client->cli_reply_portal;
308 request->rq_connection = ptlrpc_connection_addref(conn);
310 INIT_LIST_HEAD(&request->rq_list);
311 atomic_set(&request->rq_refcount, 1);
313 request->rq_reqmsg->magic = PTLRPC_MSG_MAGIC;
314 request->rq_reqmsg->version = PTLRPC_MSG_VERSION;
315 request->rq_reqmsg->opc = HTON__u32(opcode);
316 request->rq_reqmsg->flags = 0;
318 ptlrpc_hdl2req(request, &imp->imp_handle);
322 static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
325 if (request == NULL) {
330 /* We must take it off the imp_replay_list first. Otherwise, we'll set
331 * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
332 if (request->rq_import) {
333 unsigned long flags = 0;
335 spin_lock_irqsave(&request->rq_import->imp_lock, flags);
336 list_del_init(&request->rq_list);
338 spin_unlock_irqrestore(&request->rq_import->imp_lock,
342 if (atomic_read(&request->rq_refcount) != 0) {
343 CERROR("freeing request %p (%d->%s:%d) with refcount %d\n",
344 request, request->rq_reqmsg->opc,
345 request->rq_connection->c_remote_uuid.uuid,
346 request->rq_import->imp_client->cli_request_portal,
347 atomic_read (&request->rq_refcount));
351 if (request->rq_repmsg != NULL) {
352 OBD_FREE(request->rq_repmsg, request->rq_replen);
353 request->rq_repmsg = NULL;
354 request->rq_reply_md.start = NULL;
356 if (request->rq_reqmsg != NULL) {
357 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
358 request->rq_reqmsg = NULL;
361 ptlrpc_put_connection(request->rq_connection);
362 OBD_FREE(request, sizeof(*request));
366 void ptlrpc_free_req(struct ptlrpc_request *request)
368 __ptlrpc_free_req(request, 0);
371 static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
377 if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a)) {
378 CERROR("dereferencing freed request (bug 575)\n");
383 DEBUG_REQ(D_INFO, request, "refcount now %u",
384 atomic_read(&request->rq_refcount) - 1);
386 if (atomic_dec_and_test(&request->rq_refcount)) {
387 __ptlrpc_free_req(request, locked);
394 void ptlrpc_req_finished(struct ptlrpc_request *request)
396 __ptlrpc_req_finished(request, 0);
399 static int ptlrpc_check_reply(struct ptlrpc_request *req)
404 if (req->rq_repmsg != NULL) {
405 req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
406 /* Store transno in reqmsg for replay. */
407 req->rq_reqmsg->transno = req->rq_repmsg->transno;
408 req->rq_flags |= PTL_RPC_FL_REPLIED;
412 if (req->rq_flags & PTL_RPC_FL_RESEND) {
413 DEBUG_REQ(D_ERROR, req, "RESEND:");
417 if (req->rq_flags & PTL_RPC_FL_ERR) {
419 DEBUG_REQ(D_ERROR, req, "ABORTED:");
423 if (req->rq_flags & PTL_RPC_FL_RESTART) {
424 DEBUG_REQ(D_ERROR, req, "RESTART:");
429 DEBUG_REQ(D_NET, req, "rc = %d for", rc);
433 static int ptlrpc_check_status(struct ptlrpc_request *req)
438 err = req->rq_repmsg->status;
439 if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
440 DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR (%d)", err);
441 RETURN(err ? err : -EINVAL);
445 DEBUG_REQ(D_INFO, req, "status is %d", err);
446 } else if (err > 0) {
447 /* XXX: translate this error from net to host */
448 DEBUG_REQ(D_INFO, req, "status is %d", err);
454 static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request)
456 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
457 request->rq_reqmsg = NULL;
458 request->rq_reqlen = 0;
461 /* Abort this request and cleanup any resources associated with it. */
462 int ptlrpc_abort(struct ptlrpc_request *request)
464 /* First remove the ME for the reply; in theory, this means
465 * that we can tear down the buffer safely. */
466 if (PtlMEUnlink(request->rq_reply_me_h) != PTL_OK)
468 OBD_FREE(request->rq_reply_md.start, request->rq_replen);
470 memset(&request->rq_reply_me_h, 0, sizeof(request->rq_reply_me_h));
471 request->rq_reply_md.start = NULL;
472 request->rq_repmsg = NULL;
476 /* caller must hold imp->imp_lock */
477 void ptlrpc_free_committed(struct obd_import *imp)
479 struct list_head *tmp, *saved;
480 struct ptlrpc_request *req;
483 LASSERT(imp != NULL);
486 LASSERT(spin_is_locked(&imp->imp_lock));
489 CDEBUG(D_HA, "%s: committing for last_committed "LPU64"\n",
490 imp->imp_obd->obd_name, imp->imp_peer_committed_transno);
492 list_for_each_safe(tmp, saved, &imp->imp_replay_list) {
493 req = list_entry(tmp, struct ptlrpc_request, rq_list);
495 if (req->rq_flags & PTL_RPC_FL_REPLAY) {
496 DEBUG_REQ(D_HA, req, "keeping (FL_REPLAY)");
500 /* not yet committed */
501 if (req->rq_transno > imp->imp_peer_committed_transno) {
502 DEBUG_REQ(D_HA, req, "stopping search");
506 DEBUG_REQ(D_HA, req, "committing (last_committed "LPU64")",
507 imp->imp_peer_committed_transno);
508 list_del_init(&req->rq_list);
509 __ptlrpc_req_finished(req, 1);
516 void ptlrpc_cleanup_client(struct obd_import *imp)
518 struct list_head *tmp, *saved;
519 struct ptlrpc_request *req;
520 struct ptlrpc_connection *conn = imp->imp_connection;
526 spin_lock_irqsave(&imp->imp_lock, flags);
527 list_for_each_safe(tmp, saved, &imp->imp_replay_list) {
528 req = list_entry(tmp, struct ptlrpc_request, rq_list);
530 /* XXX we should make sure that nobody's sleeping on these! */
531 DEBUG_REQ(D_HA, req, "cleaning up from sending list");
532 list_del_init(&req->rq_list);
533 req->rq_import = NULL;
534 __ptlrpc_req_finished(req, 0);
536 spin_unlock_irqrestore(&imp->imp_lock, flags);
542 void ptlrpc_continue_req(struct ptlrpc_request *req)
544 DEBUG_REQ(D_HA, req, "continuing delayed request");
545 req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
546 req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
547 wake_up(&req->rq_wait_for_rep);
550 void ptlrpc_resend_req(struct ptlrpc_request *req)
552 DEBUG_REQ(D_HA, req, "resending");
553 req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
554 req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
555 req->rq_status = -EAGAIN;
556 req->rq_level = LUSTRE_CONN_RECOVD;
557 req->rq_flags |= PTL_RPC_FL_RESEND;
558 req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
559 wake_up(&req->rq_wait_for_rep);
562 void ptlrpc_restart_req(struct ptlrpc_request *req)
564 DEBUG_REQ(D_HA, req, "restarting (possibly-)completed request");
565 req->rq_status = -ERESTARTSYS;
566 req->rq_flags |= PTL_RPC_FL_RESTART;
567 req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
568 wake_up(&req->rq_wait_for_rep);
571 static int expired_request(void *data)
573 struct ptlrpc_request *req = data;
582 DEBUG_REQ(D_ERROR, req, "timeout");
584 req->rq_flags |= PTL_RPC_FL_TIMEOUT;
586 if (!req->rq_import) {
587 DEBUG_REQ(D_HA, req, "NULL import; already cleaned up?");
591 if (!req->rq_import->imp_connection) {
592 DEBUG_REQ(D_ERROR, req, "NULL connection");
597 if (!req->rq_import->imp_connection->c_recovd_data.rd_recovd)
600 recovd_conn_fail(req->rq_import->imp_connection);
602 /* If this request is for recovery or other primordial tasks,
603 * don't go back to sleep.
605 if (req->rq_level < LUSTRE_CONN_FULL)
610 static int interrupted_request(void *data)
612 struct ptlrpc_request *req = data;
614 req->rq_flags |= PTL_RPC_FL_INTR;
615 RETURN(1); /* ignored, as of this writing */
618 struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req)
621 atomic_inc(&req->rq_refcount);
625 void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
626 struct obd_import *imp)
628 struct list_head *tmp;
631 LASSERT(spin_is_locked(&imp->imp_lock));
634 LASSERT(imp->imp_flags & IMP_REPLAYABLE);
635 /* Balanced in ptlrpc_free_committed, usually. */
636 ptlrpc_request_addref(req);
637 list_for_each_prev(tmp, &imp->imp_replay_list) {
638 struct ptlrpc_request *iter =
639 list_entry(tmp, struct ptlrpc_request, rq_list);
641 /* We may have duplicate transnos if we create and then
642 * open a file, or for closes retained if to match creating
643 * opens, so use req->rq_xid as a secondary key.
644 * (See bugs 684, 685, and 428.)
646 if (iter->rq_transno > req->rq_transno)
649 if (iter->rq_transno == req->rq_transno) {
650 LASSERT(iter->rq_xid != req->rq_xid);
651 if (iter->rq_xid > req->rq_xid)
655 list_add(&req->rq_list, &iter->rq_list);
659 list_add_tail(&req->rq_list, &imp->imp_replay_list);
662 int ptlrpc_queue_wait(struct ptlrpc_request *req)
665 struct l_wait_info lwi;
666 struct obd_import *imp = req->rq_import;
667 struct ptlrpc_connection *conn = imp->imp_connection;
671 init_waitqueue_head(&req->rq_wait_for_rep);
673 req->rq_xid = HTON__u32(ptlrpc_next_xid());
675 /* for distributed debugging */
676 req->rq_reqmsg->status = HTON__u32(current->pid);
677 CDEBUG(D_RPCTRACE, "Sending RPC pid:xid:nid:opc %d:"LPU64":%s:"LPX64
678 ":%d\n", NTOH__u32(req->rq_reqmsg->status), req->rq_xid,
679 conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid,
680 NTOH__u32(req->rq_reqmsg->opc));
682 spin_lock_irqsave(&imp->imp_lock, flags);
685 * If the import has been invalidated (such as by an OST failure), the
686 * request must fail with -EIO.
688 if (req->rq_import->imp_flags & IMP_INVALID) {
689 DEBUG_REQ(D_ERROR, req, "IMP_INVALID:");
690 spin_unlock_irqrestore(&imp->imp_lock, flags);
694 if (req->rq_level > imp->imp_level) {
695 list_del(&req->rq_list);
696 list_add_tail(&req->rq_list, &imp->imp_delayed_list);
697 spin_unlock_irqrestore(&imp->imp_lock, flags);
699 DEBUG_REQ(D_HA, req, "\"%s\" waiting for recovery: (%d < %d)",
700 current->comm, req->rq_level, imp->imp_level);
701 lwi = LWI_INTR(NULL, NULL);
702 rc = l_wait_event(req->rq_wait_for_rep,
703 (req->rq_level <= imp->imp_level) ||
704 (req->rq_flags & PTL_RPC_FL_ERR), &lwi);
706 if (req->rq_flags & PTL_RPC_FL_ERR)
712 spin_lock_irqsave(&imp->imp_lock, flags);
713 list_del_init(&req->rq_list);
716 spin_unlock_irqrestore(&imp->imp_lock, flags);
720 CERROR("process %d resumed\n", current->pid);
724 LASSERT(list_empty(&req->rq_list));
725 list_add_tail(&req->rq_list, &imp->imp_sending_list);
726 spin_unlock_irqrestore(&imp->imp_lock, flags);
727 rc = ptl_send_rpc(req);
729 CDEBUG(D_HA, "error %d, opcode %d, need recovery\n", rc,
730 req->rq_reqmsg->opc);
731 /* sleep for a jiffy, then trigger recovery */
732 lwi = LWI_TIMEOUT_INTR(1, expired_request,
733 interrupted_request, req);
735 DEBUG_REQ(D_NET, req, "-- sleeping");
736 lwi = LWI_TIMEOUT_INTR(req->rq_timeout * HZ, expired_request,
737 interrupted_request, req);
740 l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
743 extern int reply_in_callback(ptl_event_t *ev);
744 ptl_event_t reply_ev;
745 PtlEQWait(req->rq_connection->c_peer.peer_ni->pni_reply_in_eq_h, &reply_ev);
746 reply_in_callback(&reply_ev);
750 DEBUG_REQ(D_NET, req, "-- done sleeping");
752 spin_lock_irqsave(&imp->imp_lock, flags);
753 list_del_init(&req->rq_list);
754 spin_unlock_irqrestore(&imp->imp_lock, flags);
756 if (req->rq_flags & PTL_RPC_FL_ERR) {
758 GOTO(out, rc = -EIO);
761 /* Don't resend if we were interrupted. */
762 if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) ==
764 if (req->rq_flags & PTL_RPC_FL_NO_RESEND) {
765 ptlrpc_abort(req); /* clean up reply buffers */
766 req->rq_flags &= ~PTL_RPC_FL_NO_RESEND;
767 GOTO(out, rc = -ETIMEDOUT);
769 req->rq_flags &= ~PTL_RPC_FL_RESEND;
770 lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
771 DEBUG_REQ(D_HA, req, "resending: ");
772 spin_lock_irqsave(&imp->imp_lock, flags);
776 if (req->rq_flags & PTL_RPC_FL_INTR) {
777 if (!(req->rq_flags & PTL_RPC_FL_TIMEOUT))
778 LBUG(); /* should only be interrupted if we timed out */
779 /* Clean up the dangling reply buffers */
781 GOTO(out, rc = -EINTR);
784 if (req->rq_flags & PTL_RPC_FL_TIMEOUT)
785 GOTO(out, rc = -ETIMEDOUT);
787 if (!(req->rq_flags & PTL_RPC_FL_REPLIED))
788 GOTO(out, rc = req->rq_status);
790 rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
792 CERROR("unpack_rep failed: %d\n", rc);
796 /* FIXME: Enable when BlueArc makes new release */
797 if (req->rq_repmsg->type != PTL_RPC_MSG_REPLY &&
798 req->rq_repmsg->type != PTL_RPC_MSG_ERR) {
799 CERROR("invalid packet type received (type=%u)\n",
800 req->rq_repmsg->type);
802 GOTO(out, rc = -EINVAL);
805 DEBUG_REQ(D_NET, req, "status %d", req->rq_repmsg->status);
807 /* We're a rejected connection, need to invalidate and rebuild. */
808 if (req->rq_repmsg->status == -ENOTCONN) {
809 spin_lock_irqsave(&imp->imp_lock, flags);
810 /* If someone else is reconnecting us (CONN_RECOVD) or has
811 * already completed it (handle mismatch), then we just need
814 if (imp->imp_level == LUSTRE_CONN_RECOVD ||
815 imp->imp_handle.addr != req->rq_reqmsg->addr ||
816 imp->imp_handle.cookie != req->rq_reqmsg->cookie) {
817 spin_unlock_irqrestore(&imp->imp_lock, flags);
818 GOTO(out, rc = -EIO);
820 imp->imp_level = LUSTRE_CONN_RECOVD;
821 spin_unlock_irqrestore(&imp->imp_lock, flags);
822 if (imp->imp_recover != NULL) {
823 rc = imp->imp_recover(imp, PTLRPC_RECOVD_PHASE_NOTCONN);
827 GOTO(out, rc = -EIO);
830 rc = ptlrpc_check_status(req);
832 if (req->rq_import->imp_flags & IMP_REPLAYABLE) {
833 spin_lock_irqsave(&imp->imp_lock, flags);
834 if ((req->rq_flags & PTL_RPC_FL_REPLAY || req->rq_transno != 0)
836 ptlrpc_retain_replayable_request(req, imp);
839 if (req->rq_transno > imp->imp_max_transno) {
840 imp->imp_max_transno = req->rq_transno;
843 /* Replay-enabled imports return commit-status information. */
844 if (req->rq_repmsg->last_committed) {
845 imp->imp_peer_committed_transno =
846 req->rq_repmsg->last_committed;
848 ptlrpc_free_committed(imp);
849 spin_unlock_irqrestore(&imp->imp_lock, flags);
857 int ptlrpc_replay_req(struct ptlrpc_request *req)
859 int rc = 0, old_level, old_status = 0;
860 // struct ptlrpc_client *cli = req->rq_import->imp_client;
861 struct l_wait_info lwi;
864 init_waitqueue_head(&req->rq_wait_for_rep);
865 DEBUG_REQ(D_NET, req, "");
867 req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
868 req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
870 /* temporarily set request to RECOVD level (reset at out:) */
871 old_level = req->rq_level;
872 if (req->rq_flags & PTL_RPC_FL_REPLIED)
873 old_status = req->rq_repmsg->status;
874 req->rq_level = LUSTRE_CONN_RECOVD;
875 rc = ptl_send_rpc(req);
877 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
878 ptlrpc_cleanup_request_buf(req);
879 // up(&cli->cli_rpc_sem);
883 CDEBUG(D_OTHER, "-- sleeping\n");
884 lwi = LWI_INTR(NULL, NULL); /* XXX needs timeout, nested recovery */
885 l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
886 CDEBUG(D_OTHER, "-- done\n");
888 // up(&cli->cli_rpc_sem);
890 if (!(req->rq_flags & PTL_RPC_FL_REPLIED)) {
891 CERROR("Unknown reason for wakeup\n");
892 /* XXX Phil - I end up here when I kill obdctl */
894 GOTO(out, rc = -EINTR);
897 rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
899 CERROR("unpack_rep failed: %d\n", rc);
903 CDEBUG(D_NET, "got rep "LPD64"\n", req->rq_xid);
905 /* let the callback do fixups, possibly including in the request */
906 if (req->rq_replay_cb)
907 req->rq_replay_cb(req);
909 if ((req->rq_flags & PTL_RPC_FL_REPLIED) &&
910 req->rq_repmsg->status != old_status) {
911 DEBUG_REQ(D_HA, req, "status %d, old was %d",
912 req->rq_repmsg->status, old_status);
916 req->rq_level = old_level;
920 /* XXX looks a lot like super.c:invalidate_request_list, don't it? */
921 void ptlrpc_abort_inflight(struct obd_import *imp, int dying_import)
924 struct list_head *tmp, *n;
927 /* Make sure that no new requests get processed for this import.
928 * ptlrpc_queue_wait must (and does) hold imp_lock while testing this
929 * flag and then putting requests on sending_list or delayed_list.
931 if ((imp->imp_flags & IMP_REPLAYABLE) == 0) {
932 spin_lock_irqsave(&imp->imp_lock, flags);
933 imp->imp_flags |= IMP_INVALID;
934 spin_unlock_irqrestore(&imp->imp_lock, flags);
937 list_for_each_safe(tmp, n, &imp->imp_sending_list) {
938 struct ptlrpc_request *req =
939 list_entry(tmp, struct ptlrpc_request, rq_list);
941 DEBUG_REQ(D_HA, req, "inflight");
942 req->rq_flags |= PTL_RPC_FL_ERR;
944 req->rq_import = NULL;
945 wake_up(&req->rq_wait_for_rep);
948 list_for_each_safe(tmp, n, &imp->imp_delayed_list) {
949 struct ptlrpc_request *req =
950 list_entry(tmp, struct ptlrpc_request, rq_list);
952 DEBUG_REQ(D_HA, req, "aborting waiting req");
953 req->rq_flags |= PTL_RPC_FL_ERR;
955 req->rq_import = NULL;
956 wake_up(&req->rq_wait_for_rep);