1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #define DEBUG_SUBSYSTEM S_RPC
27 #include <liblustre.h>
30 #include <linux/obd_support.h>
31 #include <linux/obd_class.h>
32 #include <linux/lustre_lib.h>
33 #include <linux/lustre_ha.h>
34 #include <linux/lustre_import.h>
36 void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
37 struct ptlrpc_client *cl)
39 cl->cli_request_portal = req_portal;
40 cl->cli_reply_portal = rep_portal;
44 struct obd_uuid *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
46 return &req->rq_connection->c_remote_uuid;
49 struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
51 struct ptlrpc_connection *c;
52 struct ptlrpc_peer peer;
55 err = ptlrpc_uuid_to_peer(uuid, &peer);
57 CERROR("cannot find peer %s!\n", uuid->uuid);
61 c = ptlrpc_get_connection(&peer, uuid);
63 memcpy(c->c_remote_uuid.uuid,
64 uuid->uuid, sizeof(c->c_remote_uuid.uuid));
68 CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c);
73 void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,struct obd_uuid *uuid)
75 struct ptlrpc_peer peer;
78 err = ptlrpc_uuid_to_peer (uuid, &peer);
80 CERROR("cannot find peer %s!\n", uuid->uuid);
84 memcpy (&conn->c_peer, &peer, sizeof (peer));
88 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn)
90 struct ptlrpc_bulk_desc *desc;
92 OBD_ALLOC(desc, sizeof(*desc));
94 desc->bd_connection = ptlrpc_connection_addref(conn);
95 atomic_set(&desc->bd_refcount, 1);
96 init_waitqueue_head(&desc->bd_waitq);
97 INIT_LIST_HEAD(&desc->bd_page_list);
98 INIT_LIST_HEAD(&desc->bd_set_chain);
99 ptl_set_inv_handle(&desc->bd_md_h);
100 ptl_set_inv_handle(&desc->bd_me_h);
106 int ptlrpc_bulk_error(struct ptlrpc_bulk_desc *desc)
109 if (desc->bd_flags & PTL_RPC_FL_TIMEOUT) {
110 rc = (desc->bd_flags & PTL_RPC_FL_INTR ? -ERESTARTSYS :
116 struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc)
118 struct ptlrpc_bulk_page *bulk;
120 OBD_ALLOC(bulk, sizeof(*bulk));
122 bulk->bp_desc = desc;
123 list_add_tail(&bulk->bp_link, &desc->bd_page_list);
124 desc->bd_page_count++;
129 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
131 struct list_head *tmp, *next;
138 LASSERT(list_empty(&desc->bd_set_chain));
140 if (atomic_read(&desc->bd_refcount) != 0)
141 CERROR("freeing desc %p with refcount %d!\n", desc,
142 atomic_read(&desc->bd_refcount));
144 list_for_each_safe(tmp, next, &desc->bd_page_list) {
145 struct ptlrpc_bulk_page *bulk;
146 bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
147 ptlrpc_free_bulk_page(bulk);
150 ptlrpc_put_connection(desc->bd_connection);
152 OBD_FREE(desc, sizeof(*desc));
156 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
164 list_del(&bulk->bp_link);
165 bulk->bp_desc->bd_page_count--;
166 OBD_FREE(bulk, sizeof(*bulk));
170 static int ll_sync_brw_timeout(void *data)
172 struct obd_brw_set *set = data;
173 struct list_head *tmp;
179 set->brw_flags |= PTL_RPC_FL_TIMEOUT;
181 list_for_each(tmp, &set->brw_desc_head) {
182 struct ptlrpc_bulk_desc *desc =
183 list_entry(tmp, struct ptlrpc_bulk_desc, bd_set_chain);
185 /* Skip descriptors that were completed successfully. */
186 if (desc->bd_flags & (PTL_BULK_FL_RCVD | PTL_BULK_FL_SENT))
189 LASSERT(desc->bd_connection);
191 /* If PtlMDUnlink succeeds, then bulk I/O on the MD hasn't
192 * even started yet. XXX where do we kunmup the thing?
194 * If it fail with PTL_MD_BUSY, then the network is still
195 * reading/writing the buffers and we must wait for it to
196 * complete (which it will within finite time, most
197 * probably with failure; we really need portals error
198 * events to detect that).
200 * Otherwise (PTL_INV_MD) it completed after the bd_flags
203 if (PtlMDUnlink(desc->bd_md_h) != PTL_OK) {
204 CERROR("Near-miss on OST %s -- need to adjust "
206 desc->bd_connection->c_remote_uuid.uuid);
210 CERROR("IO of %d pages to/from %s:%d (conn %p) timed out\n",
212 desc->bd_connection->c_remote_uuid.uuid,
213 desc->bd_portal, desc->bd_connection);
215 /* This one will "never" arrive, don't wait for it. */
216 if (atomic_dec_and_test(&set->brw_refcount))
217 wake_up(&set->brw_waitq);
219 if (class_signal_connection_failure)
220 class_signal_connection_failure(desc->bd_connection);
225 /* 0 = We go back to sleep, until we're resumed or interrupted */
226 /* 1 = We can't be recovered, just abort the syscall with -ETIMEDOUT */
230 static int ll_sync_brw_intr(void *data)
232 struct obd_brw_set *set = data;
235 set->brw_flags |= PTL_RPC_FL_INTR;
236 RETURN(1); /* ignored, as of this writing */
239 int ll_brw_sync_wait(struct obd_brw_set *set, int phase)
241 struct l_wait_info lwi;
242 struct list_head *tmp, *next;
248 lwi = LWI_TIMEOUT_INTR(obd_timeout * HZ, ll_sync_brw_timeout,
249 ll_sync_brw_intr, set);
250 rc = l_wait_event(set->brw_waitq,
251 atomic_read(&set->brw_refcount) == 0, &lwi);
253 list_for_each_safe(tmp, next, &set->brw_desc_head) {
254 struct ptlrpc_bulk_desc *desc =
255 list_entry(tmp, struct ptlrpc_bulk_desc,
257 list_del_init(&desc->bd_set_chain);
258 ptlrpc_bulk_decref(desc);
261 case CB_PHASE_FINISH:
262 if (atomic_dec_and_test(&set->brw_refcount))
263 wake_up(&set->brw_waitq);
272 struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
273 int count, int *lengths, char **bufs)
275 struct ptlrpc_connection *conn;
276 struct ptlrpc_request *request;
280 LASSERT((unsigned long)imp > 0x1000);
281 conn = imp->imp_connection;
283 OBD_ALLOC(request, sizeof(*request));
285 CERROR("request allocation out of memory\n");
289 rc = lustre_pack_msg(count, lengths, bufs,
290 &request->rq_reqlen, &request->rq_reqmsg);
292 CERROR("cannot pack request %d\n", rc);
293 OBD_FREE(request, sizeof(*request));
297 request->rq_level = LUSTRE_CONN_FULL;
298 request->rq_type = PTL_RPC_MSG_REQUEST;
299 request->rq_import = imp;
301 /* XXX FIXME bug 625069, now 249 */
302 request->rq_request_portal = imp->imp_client->cli_request_portal;
303 request->rq_reply_portal = imp->imp_client->cli_reply_portal;
305 request->rq_connection = ptlrpc_connection_addref(conn);
307 INIT_LIST_HEAD(&request->rq_list);
308 atomic_set(&request->rq_refcount, 1);
310 request->rq_reqmsg->magic = PTLRPC_MSG_MAGIC;
311 request->rq_reqmsg->version = PTLRPC_MSG_VERSION;
312 request->rq_reqmsg->opc = HTON__u32(opcode);
313 request->rq_reqmsg->flags = 0;
315 ptlrpc_hdl2req(request, &imp->imp_handle);
319 static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
322 if (request == NULL) {
327 /* We must take it off the imp_replay_list first. Otherwise, we'll set
328 * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
329 if (request->rq_import) {
330 unsigned long flags = 0;
332 spin_lock_irqsave(&request->rq_import->imp_lock, flags);
333 list_del_init(&request->rq_list);
335 spin_unlock_irqrestore(&request->rq_import->imp_lock,
339 if (atomic_read(&request->rq_refcount) != 0) {
340 CERROR("freeing request %p (%d->%s:%d) with refcount %d\n",
341 request, request->rq_reqmsg->opc,
342 request->rq_connection->c_remote_uuid.uuid,
343 request->rq_import->imp_client->cli_request_portal,
344 atomic_read (&request->rq_refcount));
348 if (request->rq_repmsg != NULL) {
349 OBD_FREE(request->rq_repmsg, request->rq_replen);
350 request->rq_repmsg = NULL;
351 request->rq_reply_md.start = NULL;
353 if (request->rq_reqmsg != NULL) {
354 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
355 request->rq_reqmsg = NULL;
358 ptlrpc_put_connection(request->rq_connection);
359 OBD_FREE(request, sizeof(*request));
363 void ptlrpc_free_req(struct ptlrpc_request *request)
365 __ptlrpc_free_req(request, 0);
368 static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
374 if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a)) {
375 CERROR("dereferencing freed request (bug 575)\n");
380 DEBUG_REQ(D_INFO, request, "refcount now %u",
381 atomic_read(&request->rq_refcount) - 1);
383 if (atomic_dec_and_test(&request->rq_refcount)) {
384 __ptlrpc_free_req(request, locked);
391 void ptlrpc_req_finished(struct ptlrpc_request *request)
393 __ptlrpc_req_finished(request, 0);
396 static int ptlrpc_check_reply(struct ptlrpc_request *req)
401 if (req->rq_repmsg != NULL) {
402 req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
403 /* Store transno in reqmsg for replay. */
404 req->rq_reqmsg->transno = req->rq_repmsg->transno;
405 req->rq_flags |= PTL_RPC_FL_REPLIED;
409 if (req->rq_flags & PTL_RPC_FL_RESEND) {
410 DEBUG_REQ(D_ERROR, req, "RESEND:");
414 if (req->rq_flags & PTL_RPC_FL_ERR) {
416 DEBUG_REQ(D_ERROR, req, "ABORTED:");
420 if (req->rq_flags & PTL_RPC_FL_RESTART) {
421 DEBUG_REQ(D_ERROR, req, "RESTART:");
426 DEBUG_REQ(D_NET, req, "rc = %d for", rc);
430 static int ptlrpc_check_status(struct ptlrpc_request *req)
435 err = req->rq_repmsg->status;
436 if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
437 DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR (%d)", err);
438 RETURN(err ? err : -EINVAL);
442 DEBUG_REQ(D_INFO, req, "status is %d", err);
443 } else if (err > 0) {
444 /* XXX: translate this error from net to host */
445 DEBUG_REQ(D_INFO, req, "status is %d", err);
451 static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request)
453 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
454 request->rq_reqmsg = NULL;
455 request->rq_reqlen = 0;
458 /* Abort this request and cleanup any resources associated with it. */
459 int ptlrpc_abort(struct ptlrpc_request *request)
461 /* First remove the ME for the reply; in theory, this means
462 * that we can tear down the buffer safely. */
463 if (PtlMEUnlink(request->rq_reply_me_h) != PTL_OK)
465 OBD_FREE(request->rq_reply_md.start, request->rq_replen);
467 memset(&request->rq_reply_me_h, 0, sizeof(request->rq_reply_me_h));
468 request->rq_reply_md.start = NULL;
469 request->rq_repmsg = NULL;
473 /* caller must hold imp->imp_lock */
474 void ptlrpc_free_committed(struct obd_import *imp)
476 struct list_head *tmp, *saved;
477 struct ptlrpc_request *req;
480 LASSERT(imp != NULL);
483 LASSERT(spin_is_locked(&imp->imp_lock));
486 CDEBUG(D_HA, "%s: committing for last_committed "LPU64"\n",
487 imp->imp_obd->obd_name, imp->imp_peer_committed_transno);
489 list_for_each_safe(tmp, saved, &imp->imp_replay_list) {
490 req = list_entry(tmp, struct ptlrpc_request, rq_list);
492 if (req->rq_flags & PTL_RPC_FL_REPLAY) {
493 DEBUG_REQ(D_HA, req, "keeping (FL_REPLAY)");
497 /* not yet committed */
498 if (req->rq_transno > imp->imp_peer_committed_transno) {
499 DEBUG_REQ(D_HA, req, "stopping search");
503 DEBUG_REQ(D_HA, req, "committing (last_committed "LPU64")",
504 imp->imp_peer_committed_transno);
505 list_del_init(&req->rq_list);
506 __ptlrpc_req_finished(req, 1);
513 void ptlrpc_cleanup_client(struct obd_import *imp)
515 struct list_head *tmp, *saved;
516 struct ptlrpc_request *req;
517 struct ptlrpc_connection *conn = imp->imp_connection;
523 spin_lock_irqsave(&imp->imp_lock, flags);
524 list_for_each_safe(tmp, saved, &imp->imp_replay_list) {
525 req = list_entry(tmp, struct ptlrpc_request, rq_list);
527 /* XXX we should make sure that nobody's sleeping on these! */
528 DEBUG_REQ(D_HA, req, "cleaning up from sending list");
529 list_del_init(&req->rq_list);
530 req->rq_import = NULL;
531 __ptlrpc_req_finished(req, 0);
533 spin_unlock_irqrestore(&imp->imp_lock, flags);
539 void ptlrpc_continue_req(struct ptlrpc_request *req)
541 DEBUG_REQ(D_HA, req, "continuing delayed request");
542 req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
543 req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
544 wake_up(&req->rq_wait_for_rep);
547 void ptlrpc_resend_req(struct ptlrpc_request *req)
549 DEBUG_REQ(D_HA, req, "resending");
550 req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
551 req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
552 req->rq_status = -EAGAIN;
553 req->rq_level = LUSTRE_CONN_RECOVD;
554 req->rq_flags |= PTL_RPC_FL_RESEND;
555 req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
556 wake_up(&req->rq_wait_for_rep);
559 void ptlrpc_restart_req(struct ptlrpc_request *req)
561 DEBUG_REQ(D_HA, req, "restarting (possibly-)completed request");
562 req->rq_status = -ERESTARTSYS;
563 req->rq_flags |= PTL_RPC_FL_RESTART;
564 req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
565 wake_up(&req->rq_wait_for_rep);
568 static int expired_request(void *data)
570 struct ptlrpc_request *req = data;
579 DEBUG_REQ(D_ERROR, req, "timeout");
581 req->rq_flags |= PTL_RPC_FL_TIMEOUT;
583 if (!req->rq_import) {
584 DEBUG_REQ(D_HA, req, "NULL import; already cleaned up?");
588 if (!req->rq_import->imp_connection) {
589 DEBUG_REQ(D_ERROR, req, "NULL connection");
594 if (!req->rq_import->imp_connection->c_recovd_data.rd_recovd)
597 recovd_conn_fail(req->rq_import->imp_connection);
599 /* If this request is for recovery or other primordial tasks,
600 * don't go back to sleep.
602 if (req->rq_level < LUSTRE_CONN_FULL)
607 static int interrupted_request(void *data)
609 struct ptlrpc_request *req = data;
611 req->rq_flags |= PTL_RPC_FL_INTR;
612 RETURN(1); /* ignored, as of this writing */
615 struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req)
618 atomic_inc(&req->rq_refcount);
622 void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
623 struct obd_import *imp)
625 struct list_head *tmp;
628 LASSERT(spin_is_locked(&imp->imp_lock));
631 LASSERT(imp->imp_flags & IMP_REPLAYABLE);
632 /* Balanced in ptlrpc_free_committed, usually. */
633 ptlrpc_request_addref(req);
634 list_for_each_prev(tmp, &imp->imp_replay_list) {
635 struct ptlrpc_request *iter =
636 list_entry(tmp, struct ptlrpc_request, rq_list);
638 /* We may have duplicate transnos if we create and then
639 * open a file, or for closes retained if to match creating
640 * opens, so use req->rq_xid as a secondary key.
641 * (See bugs 684, 685, and 428.)
643 if (iter->rq_transno > req->rq_transno)
646 if (iter->rq_transno == req->rq_transno) {
647 LASSERT(iter->rq_xid != req->rq_xid);
648 if (iter->rq_xid > req->rq_xid)
652 list_add(&req->rq_list, &iter->rq_list);
656 list_add_tail(&req->rq_list, &imp->imp_replay_list);
659 int ptlrpc_queue_wait(struct ptlrpc_request *req)
662 struct l_wait_info lwi;
663 struct obd_import *imp = req->rq_import;
664 struct ptlrpc_connection *conn = imp->imp_connection;
668 init_waitqueue_head(&req->rq_wait_for_rep);
670 req->rq_xid = HTON__u32(ptlrpc_next_xid());
672 /* for distributed debugging */
673 req->rq_reqmsg->status = HTON__u32(current->pid);
674 CDEBUG(D_RPCTRACE, "Sending RPC pid:xid:nid:opc %d:"LPU64":%s:"LPX64
675 ":%d\n", NTOH__u32(req->rq_reqmsg->status), req->rq_xid,
676 conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid,
677 NTOH__u32(req->rq_reqmsg->opc));
679 spin_lock_irqsave(&imp->imp_lock, flags);
682 * If the import has been invalidated (such as by an OST failure), the
683 * request must fail with -EIO.
685 if (req->rq_import->imp_flags & IMP_INVALID) {
686 DEBUG_REQ(D_ERROR, req, "IMP_INVALID:");
687 spin_unlock_irqrestore(&imp->imp_lock, flags);
691 if (req->rq_level > imp->imp_level) {
692 list_del(&req->rq_list);
693 list_add_tail(&req->rq_list, &imp->imp_delayed_list);
694 spin_unlock_irqrestore(&imp->imp_lock, flags);
696 DEBUG_REQ(D_HA, req, "\"%s\" waiting for recovery: (%d < %d)",
697 current->comm, req->rq_level, imp->imp_level);
698 lwi = LWI_INTR(NULL, NULL);
699 rc = l_wait_event(req->rq_wait_for_rep,
700 (req->rq_level <= imp->imp_level) ||
701 (req->rq_flags & PTL_RPC_FL_ERR), &lwi);
703 if (req->rq_flags & PTL_RPC_FL_ERR)
709 spin_lock_irqsave(&imp->imp_lock, flags);
710 list_del_init(&req->rq_list);
713 spin_unlock_irqrestore(&imp->imp_lock, flags);
717 CERROR("process %d resumed\n", current->pid);
721 LASSERT(list_empty(&req->rq_list));
722 list_add_tail(&req->rq_list, &imp->imp_sending_list);
723 spin_unlock_irqrestore(&imp->imp_lock, flags);
724 rc = ptl_send_rpc(req);
726 CDEBUG(D_HA, "error %d, opcode %d, need recovery\n", rc,
727 req->rq_reqmsg->opc);
728 /* sleep for a jiffy, then trigger recovery */
729 lwi = LWI_TIMEOUT_INTR(1, expired_request,
730 interrupted_request, req);
732 DEBUG_REQ(D_NET, req, "-- sleeping");
733 lwi = LWI_TIMEOUT_INTR(obd_timeout * HZ, expired_request,
734 interrupted_request, req);
737 l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
740 extern int reply_in_callback(ptl_event_t *ev);
741 ptl_event_t reply_ev;
742 PtlEQWait(req->rq_connection->c_peer.peer_ni->pni_reply_in_eq_h, &reply_ev);
743 reply_in_callback(&reply_ev);
747 DEBUG_REQ(D_NET, req, "-- done sleeping");
749 spin_lock_irqsave(&imp->imp_lock, flags);
750 list_del_init(&req->rq_list);
751 spin_unlock_irqrestore(&imp->imp_lock, flags);
753 if (req->rq_flags & PTL_RPC_FL_ERR) {
755 GOTO(out, rc = -EIO);
758 /* Don't resend if we were interrupted. */
759 if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) ==
761 if (req->rq_flags & PTL_RPC_FL_NO_RESEND) {
762 ptlrpc_abort(req); /* clean up reply buffers */
763 req->rq_flags &= ~PTL_RPC_FL_NO_RESEND;
764 GOTO(out, rc = -ETIMEDOUT);
766 req->rq_flags &= ~PTL_RPC_FL_RESEND;
767 lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
768 DEBUG_REQ(D_HA, req, "resending: ");
769 spin_lock_irqsave(&imp->imp_lock, flags);
773 if (req->rq_flags & PTL_RPC_FL_INTR) {
774 if (!(req->rq_flags & PTL_RPC_FL_TIMEOUT))
775 LBUG(); /* should only be interrupted if we timed out */
776 /* Clean up the dangling reply buffers */
778 GOTO(out, rc = -EINTR);
781 if (req->rq_flags & PTL_RPC_FL_TIMEOUT)
782 GOTO(out, rc = -ETIMEDOUT);
784 if (!(req->rq_flags & PTL_RPC_FL_REPLIED))
785 GOTO(out, rc = req->rq_status);
787 rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
789 CERROR("unpack_rep failed: %d\n", rc);
793 /* FIXME: Enable when BlueArc makes new release */
794 if (req->rq_repmsg->type != PTL_RPC_MSG_REPLY &&
795 req->rq_repmsg->type != PTL_RPC_MSG_ERR) {
796 CERROR("invalid packet type received (type=%u)\n",
797 req->rq_repmsg->type);
799 GOTO(out, rc = -EINVAL);
802 DEBUG_REQ(D_NET, req, "status %d", req->rq_repmsg->status);
804 /* We're a rejected connection, need to invalidate and rebuild. */
805 if (req->rq_repmsg->status == -ENOTCONN) {
806 spin_lock_irqsave(&imp->imp_lock, flags);
807 /* If someone else is reconnecting us (CONN_RECOVD) or has
808 * already completed it (handle mismatch), then we just need
811 if (imp->imp_level == LUSTRE_CONN_RECOVD ||
812 imp->imp_handle.addr != req->rq_reqmsg->addr ||
813 imp->imp_handle.cookie != req->rq_reqmsg->cookie) {
814 spin_unlock_irqrestore(&imp->imp_lock, flags);
815 GOTO(out, rc = -EIO);
817 imp->imp_level = LUSTRE_CONN_RECOVD;
818 spin_unlock_irqrestore(&imp->imp_lock, flags);
819 rc = imp->imp_recover(imp, PTLRPC_RECOVD_PHASE_NOTCONN);
822 GOTO(out, rc = -EIO);
825 rc = ptlrpc_check_status(req);
827 if (req->rq_import->imp_flags & IMP_REPLAYABLE) {
828 spin_lock_irqsave(&imp->imp_lock, flags);
829 if ((req->rq_flags & PTL_RPC_FL_REPLAY || req->rq_transno != 0)
831 ptlrpc_retain_replayable_request(req, imp);
834 if (req->rq_transno > imp->imp_max_transno) {
835 imp->imp_max_transno = req->rq_transno;
838 /* Replay-enabled imports return commit-status information. */
839 if (req->rq_repmsg->last_committed) {
840 imp->imp_peer_committed_transno =
841 req->rq_repmsg->last_committed;
843 ptlrpc_free_committed(imp);
844 spin_unlock_irqrestore(&imp->imp_lock, flags);
852 int ptlrpc_replay_req(struct ptlrpc_request *req)
854 int rc = 0, old_level, old_status = 0;
855 // struct ptlrpc_client *cli = req->rq_import->imp_client;
856 struct l_wait_info lwi;
859 init_waitqueue_head(&req->rq_wait_for_rep);
860 DEBUG_REQ(D_NET, req, "");
862 req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
863 req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
865 /* temporarily set request to RECOVD level (reset at out:) */
866 old_level = req->rq_level;
867 if (req->rq_flags & PTL_RPC_FL_REPLIED)
868 old_status = req->rq_repmsg->status;
869 req->rq_level = LUSTRE_CONN_RECOVD;
870 rc = ptl_send_rpc(req);
872 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
873 ptlrpc_cleanup_request_buf(req);
874 // up(&cli->cli_rpc_sem);
878 CDEBUG(D_OTHER, "-- sleeping\n");
879 lwi = LWI_INTR(NULL, NULL); /* XXX needs timeout, nested recovery */
880 l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
881 CDEBUG(D_OTHER, "-- done\n");
883 // up(&cli->cli_rpc_sem);
885 if (!(req->rq_flags & PTL_RPC_FL_REPLIED)) {
886 CERROR("Unknown reason for wakeup\n");
887 /* XXX Phil - I end up here when I kill obdctl */
889 GOTO(out, rc = -EINTR);
892 rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
894 CERROR("unpack_rep failed: %d\n", rc);
898 CDEBUG(D_NET, "got rep "LPD64"\n", req->rq_xid);
900 /* let the callback do fixups, possibly including in the request */
901 if (req->rq_replay_cb)
902 req->rq_replay_cb(req);
904 if ((req->rq_flags & PTL_RPC_FL_REPLIED) &&
905 req->rq_repmsg->status != old_status) {
906 DEBUG_REQ(D_HA, req, "status %d, old was %d",
907 req->rq_repmsg->status, old_status);
911 req->rq_level = old_level;
915 /* XXX looks a lot like super.c:invalidate_request_list, don't it? */
916 void ptlrpc_abort_inflight(struct obd_import *imp, int dying_import)
919 struct list_head *tmp, *n;
921 /* Make sure that no new requests get processed for this import.
922 * ptlrpc_queue_wait must (and does) hold imp_lock while testing this
923 * flag and then putting requests on sending_list or delayed_list.
925 if ((imp->imp_flags & IMP_REPLAYABLE) == 0) {
926 spin_lock_irqsave(&imp->imp_lock, flags);
927 imp->imp_flags |= IMP_INVALID;
928 spin_unlock_irqrestore(&imp->imp_lock, flags);
931 list_for_each_safe(tmp, n, &imp->imp_sending_list) {
932 struct ptlrpc_request *req =
933 list_entry(tmp, struct ptlrpc_request, rq_list);
935 DEBUG_REQ(D_HA, req, "inflight");
936 req->rq_flags |= PTL_RPC_FL_ERR;
938 req->rq_import = NULL;
939 wake_up(&req->rq_wait_for_rep);
942 list_for_each_safe(tmp, n, &imp->imp_delayed_list) {
943 struct ptlrpc_request *req =
944 list_entry(tmp, struct ptlrpc_request, rq_list);
946 DEBUG_REQ(D_HA, req, "aborting waiting req");
947 req->rq_flags |= PTL_RPC_FL_ERR;
949 req->rq_import = NULL;
950 wake_up(&req->rq_wait_for_rep);