1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #define DEBUG_SUBSYSTEM S_RPC
25 #include <linux/obd_support.h>
26 #include <linux/obd_class.h>
27 #include <linux/lustre_lib.h>
28 #include <linux/lustre_ha.h>
29 #include <linux/lustre_import.h>
31 void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
32 struct ptlrpc_client *cl)
34 cl->cli_request_portal = req_portal;
35 cl->cli_reply_portal = rep_portal;
39 __u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
41 return req->rq_connection->c_remote_uuid;
44 struct ptlrpc_connection *ptlrpc_uuid_to_connection(obd_uuid_t uuid)
46 struct ptlrpc_connection *c;
47 struct lustre_peer peer;
50 err = kportal_uuid_to_peer(uuid, &peer);
52 CERROR("cannot find peer %s!\n", uuid);
56 c = ptlrpc_get_connection(&peer, uuid);
58 memcpy(c->c_remote_uuid, uuid, sizeof(c->c_remote_uuid));
62 CDEBUG(D_INFO, "%s -> %p\n", uuid, c);
67 void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,obd_uuid_t uuid)
69 struct lustre_peer peer;
72 err = kportal_uuid_to_peer(uuid, &peer);
74 CERROR("cannot find peer %s!\n", uuid);
78 memcpy(&conn->c_peer, &peer, sizeof(peer));
82 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn)
84 struct ptlrpc_bulk_desc *desc;
86 OBD_ALLOC(desc, sizeof(*desc));
88 desc->bd_connection = ptlrpc_connection_addref(conn);
89 atomic_set(&desc->bd_refcount, 1);
90 init_waitqueue_head(&desc->bd_waitq);
91 INIT_LIST_HEAD(&desc->bd_page_list);
92 INIT_LIST_HEAD(&desc->bd_set_chain);
93 ptl_set_inv_handle(&desc->bd_md_h);
94 ptl_set_inv_handle(&desc->bd_me_h);
100 int ptlrpc_bulk_error(struct ptlrpc_bulk_desc *desc)
103 if (desc->bd_flags & PTL_RPC_FL_TIMEOUT) {
104 rc = (desc->bd_flags & PTL_RPC_FL_INTR ? -ERESTARTSYS :
110 struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc)
112 struct ptlrpc_bulk_page *bulk;
114 OBD_ALLOC(bulk, sizeof(*bulk));
116 bulk->bp_desc = desc;
117 list_add_tail(&bulk->bp_link, &desc->bd_page_list);
118 desc->bd_page_count++;
123 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
125 struct list_head *tmp, *next;
132 LASSERT(list_empty(&desc->bd_set_chain));
134 list_for_each_safe(tmp, next, &desc->bd_page_list) {
135 struct ptlrpc_bulk_page *bulk;
136 bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
137 ptlrpc_free_bulk_page(bulk);
140 ptlrpc_put_connection(desc->bd_connection);
142 OBD_FREE(desc, sizeof(*desc));
146 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
154 list_del(&bulk->bp_link);
155 bulk->bp_desc->bd_page_count--;
156 OBD_FREE(bulk, sizeof(*bulk));
160 static int ll_sync_brw_timeout(void *data)
162 struct obd_brw_set *set = data;
163 struct list_head *tmp;
169 set->brw_flags |= PTL_RPC_FL_TIMEOUT;
171 list_for_each(tmp, &set->brw_desc_head) {
172 struct ptlrpc_bulk_desc *desc =
173 list_entry(tmp, struct ptlrpc_bulk_desc, bd_set_chain);
175 /* Skip descriptors that were completed successfully. */
176 if (desc->bd_flags & (PTL_BULK_FL_RCVD | PTL_BULK_FL_SENT))
179 LASSERT(desc->bd_connection);
181 /* If PtlMDUnlink succeeds, then it hasn't completed yet. If it
182 * fails, the bulk finished _just_ in time (after the timeout
183 * fired but before we got this far) and we'll let it live.
185 if (PtlMDUnlink(desc->bd_md_h) != 0) {
186 CERROR("Near-miss on OST %s -- need to adjust "
188 desc->bd_connection->c_remote_uuid);
192 CERROR("IO of %d pages to/from %s:%d (conn %p) timed out\n",
193 desc->bd_page_count, desc->bd_connection->c_remote_uuid,
194 desc->bd_portal, desc->bd_connection);
195 desc->bd_connection->c_level = LUSTRE_CONN_RECOVD;
197 /* This one will "never" arrive, don't wait for it. */
198 if (atomic_dec_and_test(&set->brw_refcount))
199 wake_up(&set->brw_waitq);
201 if (class_signal_connection_failure)
202 class_signal_connection_failure(desc->bd_connection);
207 /* 0 = We go back to sleep, until we're resumed or interrupted */
208 /* 1 = We can't be recovered, just abort the syscall with -ETIMEDOUT */
212 static int ll_sync_brw_intr(void *data)
214 struct obd_brw_set *set = data;
217 set->brw_flags |= PTL_RPC_FL_INTR;
218 RETURN(1); /* ignored, as of this writing */
221 int ll_brw_sync_wait(struct obd_brw_set *set, int phase)
223 struct l_wait_info lwi;
224 struct list_head *tmp, *next;
230 lwi = LWI_TIMEOUT_INTR(obd_timeout * HZ, ll_sync_brw_timeout,
231 ll_sync_brw_intr, set);
232 rc = l_wait_event(set->brw_waitq,
233 atomic_read(&set->brw_refcount) == 0, &lwi);
235 list_for_each_safe(tmp, next, &set->brw_desc_head) {
236 struct ptlrpc_bulk_desc *desc =
237 list_entry(tmp, struct ptlrpc_bulk_desc,
239 list_del_init(&desc->bd_set_chain);
240 ptlrpc_bulk_decref(desc);
243 case CB_PHASE_FINISH:
244 if (atomic_dec_and_test(&set->brw_refcount))
245 wake_up(&set->brw_waitq);
254 struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
255 int count, int *lengths, char **bufs)
257 struct ptlrpc_connection *conn = imp->imp_connection;
258 struct ptlrpc_request *request;
262 OBD_ALLOC(request, sizeof(*request));
264 CERROR("request allocation out of memory\n");
268 rc = lustre_pack_msg(count, lengths, bufs,
269 &request->rq_reqlen, &request->rq_reqmsg);
271 CERROR("cannot pack request %d\n", rc);
272 OBD_FREE(request, sizeof(*request));
276 request->rq_level = LUSTRE_CONN_FULL;
277 request->rq_type = PTL_RPC_MSG_REQUEST;
278 request->rq_import = imp;
280 /* XXX FIXME bug 625069 */
281 request->rq_request_portal = imp->imp_client->cli_request_portal;
282 request->rq_reply_portal = imp->imp_client->cli_reply_portal;
284 request->rq_connection = ptlrpc_connection_addref(conn);
286 INIT_LIST_HEAD(&request->rq_list);
288 * This will be reduced once when the sender is finished (waiting for
289 * reply, f.e.), and once when the request has been committed and is
290 * removed from the to-be-committed list.
292 * Also, the refcount will be increased in ptl_send_rpc immediately
293 * before we hand it off to portals, and there will be a corresponding
294 * decrease in request_out_cb (which is called to indicate that portals
295 * is finished with the request, and it can be safely freed).
297 * (Except in the DLM server case, where it will be dropped twice
298 * by the sender, and then the last time by request_out_callback.)
300 atomic_set(&request->rq_refcount, 2);
302 spin_lock(&imp->imp_lock);
303 request->rq_xid = HTON__u32(++imp->imp_last_xid);
304 spin_unlock(&imp->imp_lock);
306 request->rq_reqmsg->magic = PTLRPC_MSG_MAGIC;
307 request->rq_reqmsg->version = PTLRPC_MSG_VERSION;
308 request->rq_reqmsg->opc = HTON__u32(opcode);
309 request->rq_reqmsg->flags = 0;
311 ptlrpc_hdl2req(request, &imp->imp_handle);
315 static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
318 if (request == NULL) {
323 if (atomic_read(&request->rq_refcount) != 0) {
324 CERROR("freeing request %p (%d->%s:%d) with refcount %d\n",
325 request, request->rq_reqmsg->opc,
326 request->rq_connection->c_remote_uuid,
327 request->rq_import->imp_client->cli_request_portal,
328 request->rq_refcount);
332 if (request->rq_repmsg != NULL) {
333 OBD_FREE(request->rq_repmsg, request->rq_replen);
334 request->rq_repmsg = NULL;
335 request->rq_reply_md.start = NULL;
337 if (request->rq_reqmsg != NULL) {
338 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
339 request->rq_reqmsg = NULL;
342 if (request->rq_import) {
344 spin_lock(&request->rq_import->imp_lock);
345 list_del_init(&request->rq_list);
347 spin_unlock(&request->rq_import->imp_lock);
350 ptlrpc_put_connection(request->rq_connection);
351 OBD_FREE(request, sizeof(*request));
355 void ptlrpc_free_req(struct ptlrpc_request *request)
357 __ptlrpc_free_req(request, 0);
360 static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
366 if (atomic_dec_and_test(&request->rq_refcount)) {
367 __ptlrpc_free_req(request, locked);
371 DEBUG_REQ(D_INFO, request, "refcount now %u",
372 atomic_read(&request->rq_refcount));
376 void ptlrpc_req_finished(struct ptlrpc_request *request)
378 __ptlrpc_req_finished(request, 0);
381 static int ptlrpc_check_reply(struct ptlrpc_request *req)
385 if (req->rq_repmsg != NULL) {
386 struct obd_import *imp = req->rq_import;
387 struct ptlrpc_connection *conn = imp->imp_connection;
389 if (req->rq_level > conn->c_level) {
391 "recovery started, ignoring (%d > %d)",
392 req->rq_level, conn->c_level);
393 req->rq_repmsg = NULL;
396 req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
397 spin_lock(&imp->imp_lock);
398 if (req->rq_transno > imp->imp_max_transno) {
399 imp->imp_max_transno = req->rq_transno;
400 } else if (req->rq_transno != 0) {
401 if (conn->c_level == LUSTRE_CONN_FULL) {
402 CERROR("got transno "LPD64" after "
403 LPD64": recovery may not work\n",
404 req->rq_transno, imp->imp_max_transno);
407 spin_unlock(&imp->imp_lock);
408 req->rq_flags |= PTL_RPC_FL_REPLIED;
412 if (req->rq_flags & PTL_RPC_FL_RESEND) {
413 DEBUG_REQ(D_ERROR, req, "RESEND:");
417 if (req->rq_flags & PTL_RPC_FL_ERR) {
418 DEBUG_REQ(D_ERROR, req, "ABORTED:");
422 if (req->rq_flags & PTL_RPC_FL_RESTART) {
423 DEBUG_REQ(D_ERROR, req, "RESTART:");
427 DEBUG_REQ(D_NET, req, "rc = %d for", rc);
431 static int ptlrpc_check_status(struct ptlrpc_request *req)
436 err = req->rq_repmsg->status;
437 if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
438 CERROR("req->rq_repmsg->type == PTL_RPC_MSG_ERR\n");
439 RETURN(err ? err : -EINVAL);
444 CDEBUG(D_INFO, "req->rq_repmsg->status is %d\n", err);
446 CDEBUG(D_INFO, "req->rq_repmsg->status is %d\n", err);
452 static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request)
454 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
455 request->rq_reqmsg = NULL;
456 request->rq_reqlen = 0;
459 /* Abort this request and cleanup any resources associated with it. */
460 static int ptlrpc_abort(struct ptlrpc_request *request)
462 /* First remove the ME for the reply; in theory, this means
463 * that we can tear down the buffer safely. */
464 PtlMEUnlink(request->rq_reply_me_h);
465 OBD_FREE(request->rq_reply_md.start, request->rq_replen);
466 request->rq_repmsg = NULL;
467 request->rq_replen = 0;
471 /* caller must hold imp->imp_lock */
472 void ptlrpc_free_committed(struct obd_import *imp)
474 struct list_head *tmp, *saved;
475 struct ptlrpc_request *req;
477 list_for_each_safe(tmp, saved, &imp->imp_request_list) {
478 req = list_entry(tmp, struct ptlrpc_request, rq_list);
480 if (req->rq_flags & PTL_RPC_FL_REPLAY) {
481 DEBUG_REQ(D_HA, req, "keeping (FL_REPLAY)");
485 /* If neither replied-to nor restarted, keep it. */
486 if (!(req->rq_flags &
487 (PTL_RPC_FL_REPLIED | PTL_RPC_FL_RESTART))) {
488 DEBUG_REQ(D_HA, req, "keeping (in-flight)");
492 /* This needs to match the commit test in ptlrpc_queue_wait() */
493 if (!(req->rq_import->imp_flags & IMP_REPLAYABLE) ||
494 req->rq_transno == 0) {
495 DEBUG_REQ(D_HA, req, "keeping (queue_wait will free)");
499 /* not yet committed */
500 if (req->rq_transno > imp->imp_peer_committed_transno)
503 DEBUG_REQ(D_HA, req, "committing (last_committed %Lu)",
504 imp->imp_peer_committed_transno);
505 __ptlrpc_req_finished(req, 1);
512 void ptlrpc_cleanup_client(struct obd_import *imp)
514 struct list_head *tmp, *saved;
515 struct ptlrpc_request *req;
516 struct ptlrpc_connection *conn = imp->imp_connection;
521 spin_lock(&imp->imp_lock);
522 list_for_each_safe(tmp, saved, &imp->imp_request_list) {
523 req = list_entry(tmp, struct ptlrpc_request, rq_list);
525 /* XXX we should make sure that nobody's sleeping on these! */
526 DEBUG_REQ(D_HA, req, "cleaning up from sending list");
527 list_del_init(&req->rq_list);
528 req->rq_import = NULL;
529 __ptlrpc_req_finished(req, 0);
531 spin_unlock(&imp->imp_lock);
537 void ptlrpc_continue_req(struct ptlrpc_request *req)
540 DEBUG_REQ(D_HA, req, "continuing delayed request");
541 req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
542 req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
543 wake_up(&req->rq_wait_for_rep);
547 void ptlrpc_resend_req(struct ptlrpc_request *req)
550 DEBUG_REQ(D_HA, req, "resending");
551 req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
552 req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
553 req->rq_status = -EAGAIN;
554 req->rq_level = LUSTRE_CONN_RECOVD;
555 req->rq_flags |= PTL_RPC_FL_RESEND;
556 req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
557 wake_up(&req->rq_wait_for_rep);
561 void ptlrpc_restart_req(struct ptlrpc_request *req)
564 DEBUG_REQ(D_HA, req, "restarting (possibly-)completed request");
565 req->rq_status = -ERESTARTSYS;
566 req->rq_flags |= PTL_RPC_FL_RESTART;
567 req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
568 wake_up(&req->rq_wait_for_rep);
572 static int expired_request(void *data)
574 struct ptlrpc_request *req = data;
583 DEBUG_REQ(D_ERROR, req, "timeout");
584 req->rq_flags |= PTL_RPC_FL_TIMEOUT;
586 if (!req->rq_import) {
587 DEBUG_REQ(D_ERROR, req, "NULL import");
592 if (!req->rq_import->imp_connection) {
593 DEBUG_REQ(D_ERROR, req, "NULL connection");
598 if (!req->rq_import->imp_connection->c_recovd_data.rd_recovd)
602 req->rq_connection->c_level = LUSTRE_CONN_RECOVD;
603 recovd_conn_fail(req->rq_import->imp_connection);
605 /* If this request is for recovery or other primordial tasks,
606 * don't go back to sleep.
608 if (req->rq_level < LUSTRE_CONN_FULL)
613 static int interrupted_request(void *data)
615 struct ptlrpc_request *req = data;
617 req->rq_flags |= PTL_RPC_FL_INTR;
618 RETURN(1); /* ignored, as of this writing */
621 /* If we're being torn down by umount -f, or the import has been
622 * invalidated (such as by an OST failure), the request must fail with
625 * Must be called with conn->c_lock held, will drop it if it returns -EIO.
627 * XXX this should just be testing the import, and umount_begin shouldn't touch
628 * XXX the connection.
630 #define EIO_IF_INVALID(conn, req) \
631 if ((conn->c_flags & CONN_INVALID) || \
632 (req->rq_import->imp_flags & IMP_INVALID)) { \
633 DEBUG_REQ(D_ERROR, req, "%s_INVALID:", \
634 (conn->c_flags & CONN_INVALID) ? "CONN" : "IMP"); \
635 spin_unlock(&conn->c_lock); \
639 int ptlrpc_queue_wait(struct ptlrpc_request *req)
642 struct l_wait_info lwi;
643 struct obd_import *imp = req->rq_import;
644 struct ptlrpc_connection *conn = imp->imp_connection;
647 init_waitqueue_head(&req->rq_wait_for_rep);
648 req->rq_reqmsg->status = HTON__u32(current->pid); /* for distributed debugging */
649 CDEBUG(D_RPCTRACE, "Sending RPC pid:xid:nid:opc %d:"LPU64":%x:%d\n",
650 NTOH__u32(req->rq_reqmsg->status), req->rq_xid,
651 conn->c_peer.peer_nid, NTOH__u32(req->rq_reqmsg->opc));
654 /* XXX probably both an import and connection level are needed */
655 if (req->rq_level > conn->c_level) {
656 spin_lock(&conn->c_lock);
657 EIO_IF_INVALID(conn, req);
658 list_del(&req->rq_list);
659 list_add_tail(&req->rq_list, &conn->c_delayed_head);
660 spin_unlock(&conn->c_lock);
662 DEBUG_REQ(D_HA, req, "waiting for recovery: (%d < %d)",
663 req->rq_level, conn->c_level);
664 lwi = LWI_INTR(NULL, NULL);
665 rc = l_wait_event(req->rq_wait_for_rep,
666 (req->rq_level <= conn->c_level) ||
667 (req->rq_flags & PTL_RPC_FL_ERR), &lwi);
669 spin_lock(&conn->c_lock);
670 list_del_init(&req->rq_list);
671 spin_unlock(&conn->c_lock);
673 if (req->rq_flags & PTL_RPC_FL_ERR)
679 CERROR("process %d resumed\n", current->pid);
682 req->rq_timeout = obd_timeout;
683 spin_lock(&conn->c_lock);
684 EIO_IF_INVALID(conn, req);
686 list_del(&req->rq_list);
687 list_add_tail(&req->rq_list, &imp->imp_request_list);
688 spin_unlock(&conn->c_lock);
689 rc = ptl_send_rpc(req);
691 CDEBUG(D_HA, "error %d, opcode %d, need recovery\n", rc,
692 req->rq_reqmsg->opc);
693 /* sleep for a jiffy, then trigger recovery */
694 lwi = LWI_TIMEOUT_INTR(1, expired_request,
695 interrupted_request, req);
697 DEBUG_REQ(D_NET, req, "-- sleeping");
698 lwi = LWI_TIMEOUT_INTR(req->rq_timeout * HZ, expired_request,
699 interrupted_request, req);
701 l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
702 DEBUG_REQ(D_NET, req, "-- done sleeping");
704 if (req->rq_flags & PTL_RPC_FL_ERR) {
706 GOTO(out, rc = -EIO);
709 /* Don't resend if we were interrupted. */
710 if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) ==
712 req->rq_flags &= ~PTL_RPC_FL_RESEND;
713 DEBUG_REQ(D_HA, req, "resending: ");
717 // up(&cli->cli_rpc_sem);
718 if (req->rq_flags & PTL_RPC_FL_INTR) {
719 if (!(req->rq_flags & PTL_RPC_FL_TIMEOUT))
720 LBUG(); /* should only be interrupted if we timed out */
721 /* Clean up the dangling reply buffers */
723 GOTO(out, rc = -EINTR);
726 if (req->rq_flags & PTL_RPC_FL_TIMEOUT)
727 GOTO(out, rc = -ETIMEDOUT);
729 if (!(req->rq_flags & PTL_RPC_FL_REPLIED))
730 GOTO(out, rc = req->rq_status);
732 rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
734 CERROR("unpack_rep failed: %d\n", rc);
738 /* FIXME: Enable when BlueArc makes new release */
739 if (req->rq_repmsg->type != PTL_RPC_MSG_REPLY &&
740 req->rq_repmsg->type != PTL_RPC_MSG_ERR) {
741 CERROR("invalid packet type received (type=%u)\n",
742 req->rq_repmsg->type);
744 GOTO(out, rc = -EINVAL);
747 CDEBUG(D_NET, "got rep "LPU64"\n", req->rq_xid);
748 if (req->rq_repmsg->status == 0)
749 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
750 req->rq_replen, req->rq_repmsg->status);
752 spin_lock(&conn->c_lock);
754 /* Requests that aren't from replayable imports, or which don't have
755 * transno information, can be "committed" early.
757 * But don't commit anything that's kept indefinitely for replay (has
758 * the PTL_RPC_FL_REPLAY flag set), such as open requests.
760 * This needs to match the commit test in ptlrpc_free_committed().
762 if (!(req->rq_import->imp_flags & IMP_REPLAYABLE) ||
763 (req->rq_repmsg->transno == 0 &&
764 (req->rq_flags & PTL_RPC_FL_REPLAY) == 0)) {
765 /* This import doesn't support replay, so we can just "commit"
768 DEBUG_REQ(D_HA, req, "not replayable, committing:");
769 list_del_init(&req->rq_list);
770 __ptlrpc_req_finished(req, 1);
772 if (req->rq_import->imp_flags & IMP_REPLAYABLE) {
773 /* Replay-enabled imports return commit-status information. */
774 imp->imp_peer_last_xid = req->rq_repmsg->last_xid;
775 imp->imp_peer_committed_transno =
776 req->rq_repmsg->last_committed;
777 ptlrpc_free_committed(imp);
780 rc = ptlrpc_check_status(req);
781 spin_unlock(&conn->c_lock);
788 #undef EIO_IF_INVALID
790 int ptlrpc_replay_req(struct ptlrpc_request *req)
792 int rc = 0, old_level, old_status = 0;
793 // struct ptlrpc_client *cli = req->rq_import->imp_client;
794 struct l_wait_info lwi;
797 init_waitqueue_head(&req->rq_wait_for_rep);
798 DEBUG_REQ(D_NET, req, "");
800 req->rq_timeout = obd_timeout;
801 req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
802 req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
804 /* temporarily set request to RECOVD level (reset at out:) */
805 old_level = req->rq_level;
806 if (req->rq_flags & PTL_RPC_FL_REPLIED)
807 old_status = req->rq_repmsg->status;
808 req->rq_level = LUSTRE_CONN_RECOVD;
809 rc = ptl_send_rpc(req);
811 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
812 ptlrpc_cleanup_request_buf(req);
813 // up(&cli->cli_rpc_sem);
817 CDEBUG(D_OTHER, "-- sleeping\n");
818 lwi = LWI_INTR(NULL, NULL); /* XXX needs timeout, nested recovery */
819 l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
820 CDEBUG(D_OTHER, "-- done\n");
822 // up(&cli->cli_rpc_sem);
824 if (!(req->rq_flags & PTL_RPC_FL_REPLIED)) {
825 CERROR("Unknown reason for wakeup\n");
826 /* XXX Phil - I end up here when I kill obdctl */
828 GOTO(out, rc = -EINTR);
831 rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
833 CERROR("unpack_rep failed: %d\n", rc);
837 CDEBUG(D_NET, "got rep "LPD64"\n", req->rq_xid);
839 /* let the callback do fixups, possibly including in the request */
840 if (req->rq_replay_cb)
841 req->rq_replay_cb(req);
843 if ((req->rq_flags & PTL_RPC_FL_REPLIED) &&
844 req->rq_repmsg->status != old_status) {
845 DEBUG_REQ(D_HA, req, "status %d, old was %d",
846 req->rq_repmsg->status, old_status);
850 req->rq_level = old_level;