1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #define DEBUG_SUBSYSTEM S_RPC
25 #include <linux/obd_support.h>
26 #include <linux/obd_class.h>
27 #include <linux/lustre_lib.h>
28 #include <linux/lustre_ha.h>
30 void ptlrpc_init_client(struct recovd_obd *recovd,
31 int (*recover)(struct ptlrpc_client *recover),
33 int rep_portal, struct ptlrpc_client *cl)
35 memset(cl, 0, sizeof(*cl));
36 cl->cli_recovd = recovd;
37 cl->cli_recover = recover;
39 recovd_cli_manage(recovd, cl);
41 cl->cli_request_portal = req_portal;
42 cl->cli_reply_portal = rep_portal;
43 INIT_LIST_HEAD(&cl->cli_delayed_head);
44 INIT_LIST_HEAD(&cl->cli_sending_head);
45 INIT_LIST_HEAD(&cl->cli_dying_head);
46 spin_lock_init(&cl->cli_lock);
47 sema_init(&cl->cli_rpc_sem, 32);
50 __u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
52 return req->rq_connection->c_remote_uuid;
55 struct ptlrpc_connection *ptlrpc_uuid_to_connection(char *uuid)
57 struct ptlrpc_connection *c;
58 struct lustre_peer peer;
61 err = kportal_uuid_to_peer(uuid, &peer);
63 CERROR("cannot find peer %s!\n", uuid);
67 c = ptlrpc_get_connection(&peer);
69 memcpy(c->c_remote_uuid, uuid, sizeof(c->c_remote_uuid));
76 void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, char *uuid)
78 struct lustre_peer peer;
81 err = kportal_uuid_to_peer(uuid, &peer);
83 CERROR("cannot find peer %s!\n", uuid);
87 memcpy(&conn->c_peer, &peer, sizeof(peer));
91 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn)
93 struct ptlrpc_bulk_desc *desc;
95 OBD_ALLOC(desc, sizeof(*desc));
97 desc->b_connection = ptlrpc_connection_addref(conn);
98 atomic_set(&desc->b_refcount, 1);
99 init_waitqueue_head(&desc->b_waitq);
100 INIT_LIST_HEAD(&desc->b_page_list);
101 ptl_set_inv_handle(&desc->b_md_h);
102 ptl_set_inv_handle(&desc->b_me_h);
108 struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc)
110 struct ptlrpc_bulk_page *bulk;
112 OBD_ALLOC(bulk, sizeof(*bulk));
115 list_add_tail(&bulk->b_link, &desc->b_page_list);
116 desc->b_page_count++;
121 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
123 struct list_head *tmp, *next;
130 list_for_each_safe(tmp, next, &desc->b_page_list) {
131 struct ptlrpc_bulk_page *bulk;
132 bulk = list_entry(tmp, struct ptlrpc_bulk_page, b_link);
133 ptlrpc_free_bulk_page(bulk);
136 ptlrpc_put_connection(desc->b_connection);
138 OBD_FREE(desc, sizeof(*desc));
142 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
150 list_del(&bulk->b_link);
151 bulk->b_desc->b_page_count--;
152 OBD_FREE(bulk, sizeof(*bulk));
156 struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
157 struct ptlrpc_connection *conn,
158 int opcode, int count, int *lengths,
161 struct ptlrpc_request *request;
165 OBD_ALLOC(request, sizeof(*request));
167 CERROR("request allocation out of memory\n");
171 rc = lustre_pack_msg(count, lengths, bufs,
172 &request->rq_reqlen, &request->rq_reqmsg);
174 CERROR("cannot pack request %d\n", rc);
175 OBD_FREE(request, sizeof(*request));
179 request->rq_level = LUSTRE_CONN_FULL;
180 request->rq_type = PTL_RPC_TYPE_REQUEST;
181 request->rq_client = cl;
182 request->rq_connection = ptlrpc_connection_addref(conn);
184 INIT_LIST_HEAD(&request->rq_list);
185 INIT_LIST_HEAD(&request->rq_multi);
186 /* this will be dec()d once in req_finished, once in free_committed */
187 atomic_set(&request->rq_refcount, 2);
189 spin_lock(&conn->c_lock);
190 request->rq_xid = HTON__u32(++conn->c_xid_out);
191 spin_unlock(&conn->c_lock);
193 request->rq_reqmsg->magic = PTLRPC_MSG_MAGIC;
194 request->rq_reqmsg->version = PTLRPC_MSG_VERSION;
195 request->rq_reqmsg->opc = HTON__u32(opcode);
196 request->rq_reqmsg->type = HTON__u32(PTL_RPC_MSG_REQUEST);
200 struct ptlrpc_request *ptlrpc_prep_req2(struct lustre_handle *conn,
201 int opcode, int count, int *lengths,
204 struct client_obd *clobd;
205 struct ptlrpc_request *req;
206 struct obd_export *export;
208 export = class_conn2export(conn);
211 CERROR("NOT connected\n");
215 clobd = &export->exp_obd->u.cli;
216 req = ptlrpc_prep_req(clobd->cl_client, clobd->cl_conn,
217 opcode, count, lengths, bufs);
218 ptlrpc_hdl2req(req, &clobd->cl_exporth);
222 void ptlrpc_req_finished(struct ptlrpc_request *request)
227 if (request->rq_repmsg != NULL) {
228 OBD_FREE(request->rq_repmsg, request->rq_replen);
229 request->rq_repmsg = NULL;
230 request->rq_reply_md.start = NULL;
233 if (atomic_dec_and_test(&request->rq_refcount))
234 ptlrpc_free_req(request);
237 void ptlrpc_free_req(struct ptlrpc_request *request)
240 if (request == NULL) {
245 if (request->rq_repmsg != NULL)
246 OBD_FREE(request->rq_repmsg, request->rq_replen);
247 if (request->rq_reqmsg != NULL)
248 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
250 if (request->rq_client) {
251 spin_lock(&request->rq_client->cli_lock);
252 list_del_init(&request->rq_list);
253 spin_unlock(&request->rq_client->cli_lock);
256 ptlrpc_put_connection(request->rq_connection);
257 list_del(&request->rq_multi);
258 OBD_FREE(request, sizeof(*request));
262 static int ptlrpc_check_reply(struct ptlrpc_request *req)
266 if (req->rq_repmsg != NULL) {
267 req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
268 req->rq_flags |= PTL_RPC_FL_REPLIED;
273 if (req->rq_flags & PTL_RPC_FL_RESEND) {
274 if (l_killable_pending(current)) {
275 CERROR("-- INTR --\n");
276 req->rq_flags |= PTL_RPC_FL_INTR;
279 CERROR("-- RESEND --\n");
284 if (req->rq_flags & PTL_RPC_FL_RECOVERY) {
285 CERROR("-- RESTART --\n");
289 if (req->rq_flags & PTL_RPC_FL_TIMEOUT && l_killable_pending(current)) {
290 req->rq_flags |= PTL_RPC_FL_INTR;
294 if (req->rq_timeout &&
295 (CURRENT_TIME - req->rq_time >= req->rq_timeout)) {
296 CERROR("-- REQ TIMEOUT ON CONNID %d XID %Ld --\n",
297 req->rq_connid, (unsigned long long)req->rq_xid);
298 /* clear the timeout */
300 req->rq_connection->c_level = LUSTRE_CONN_RECOVD;
301 req->rq_flags |= PTL_RPC_FL_TIMEOUT;
302 if (req->rq_client && req->rq_client->cli_recovd)
303 recovd_cli_fail(req->rq_client);
304 if (req->rq_level < LUSTRE_CONN_FULL) {
306 } else if (l_killable_pending(current)) {
307 req->rq_flags |= PTL_RPC_FL_INTR;
316 CDEBUG(D_NET, "req = %p, rc = %d\n", req, rc);
320 int ptlrpc_check_status(struct ptlrpc_request *req, int err)
325 CERROR("err is %d\n", err);
330 CERROR("req == NULL\n");
334 if (req->rq_repmsg == NULL) {
335 CERROR("req->rq_repmsg == NULL\n");
339 err = req->rq_repmsg->status;
340 if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
341 CERROR("req->rq_repmsg->type == PTL_RPC_MSG_ERR\n");
342 RETURN(err ? err : -EINVAL);
347 CERROR("req->rq_repmsg->status is %d\n", err);
349 CDEBUG(D_INFO, "req->rq_repmsg->status is %d\n", err);
350 /* XXX: translate this error from net to host */
357 static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request)
359 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
360 request->rq_reqmsg = NULL;
361 request->rq_reqlen = 0;
364 /* Abort this request and cleanup any resources associated with it. */
365 static int ptlrpc_abort(struct ptlrpc_request *request)
367 /* First remove the ME for the reply; in theory, this means
368 * that we can tear down the buffer safely. */
369 PtlMEUnlink(request->rq_reply_me_h);
370 OBD_FREE(request->rq_reply_md.start, request->rq_replen);
371 request->rq_repmsg = NULL;
372 request->rq_replen = 0;
376 /* caller must lock cli */
377 void ptlrpc_free_committed(struct ptlrpc_client *cli)
379 struct list_head *tmp, *saved;
380 struct ptlrpc_request *req;
382 list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
383 req = list_entry(tmp, struct ptlrpc_request, rq_list);
385 if ( (req->rq_flags & PTL_RPC_FL_REPLAY) ) {
386 CDEBUG(D_INFO, "Retaining request %Ld for replay\n",
391 /* not yet committed */
392 if (req->rq_transno > cli->cli_last_committed)
395 CDEBUG(D_INFO, "Marking request xid %Ld as committed ("
396 "transno=%Lu, last_committed=%Lu\n",
397 (long long)req->rq_xid, (long long)req->rq_transno,
398 (long long)cli->cli_last_committed);
399 if (atomic_dec_and_test(&req->rq_refcount)) {
400 /* we do this to prevent free_req deadlock */
401 list_del_init(&req->rq_list);
402 req->rq_client = NULL;
403 ptlrpc_free_req(req);
405 list_del_init(&req->rq_list);
406 list_add(&req->rq_list, &cli->cli_dying_head);
414 void ptlrpc_cleanup_client(struct ptlrpc_client *cli)
416 struct list_head *tmp, *saved;
417 struct ptlrpc_request *req;
420 spin_lock(&cli->cli_lock);
421 list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
422 req = list_entry(tmp, struct ptlrpc_request, rq_list);
423 CDEBUG(D_INFO, "Cleaning req %p from sending list.\n", req);
424 list_del_init(&req->rq_list);
425 req->rq_client = NULL;
426 ptlrpc_free_req(req);
428 list_for_each_safe(tmp, saved, &cli->cli_dying_head) {
429 req = list_entry(tmp, struct ptlrpc_request, rq_list);
430 CERROR("Request %p is on the dying list at cleanup!\n", req);
431 list_del_init(&req->rq_list);
432 req->rq_client = NULL;
433 ptlrpc_free_req(req);
435 spin_unlock(&cli->cli_lock);
441 void ptlrpc_continue_req(struct ptlrpc_request *req)
444 CDEBUG(D_INODE, "continue delayed request %Ld opc %d\n",
445 req->rq_xid, req->rq_reqmsg->opc);
446 wake_up(&req->rq_wait_for_rep);
450 void ptlrpc_resend_req(struct ptlrpc_request *req)
453 CDEBUG(D_INODE, "resend request %Ld, opc %d\n",
454 req->rq_xid, req->rq_reqmsg->opc);
455 req->rq_status = -EAGAIN;
456 req->rq_level = LUSTRE_CONN_RECOVD;
457 req->rq_flags |= PTL_RPC_FL_RESEND;
458 req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
459 wake_up(&req->rq_wait_for_rep);
463 void ptlrpc_restart_req(struct ptlrpc_request *req)
466 CDEBUG(D_INODE, "restart completed request %Ld, opc %d\n",
467 req->rq_xid, req->rq_reqmsg->opc);
468 req->rq_status = -ERESTARTSYS;
469 req->rq_flags |= PTL_RPC_FL_RECOVERY;
470 req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
471 wake_up(&req->rq_wait_for_rep);
475 static int expired_request(void *data)
477 struct ptlrpc_request *req = data;
481 req->rq_connection->c_level = LUSTRE_CONN_RECOVD;
482 req->rq_flags |= PTL_RPC_FL_TIMEOUT;
483 /* Activate the recovd for this client, if there is one. */
484 if (req->rq_client && req->rq_client->cli_recovd)
485 recovd_cli_fail(req->rq_client);
487 /* If this request is for recovery or other primordial tasks,
488 * don't go back to sleep.
490 if (req->rq_level < LUSTRE_CONN_FULL)
495 static int interrupted_request(void *data)
497 struct ptlrpc_request *req = data;
499 req->rq_flags |= PTL_RPC_FL_INTR;
500 RETURN(1); /* ignored, as of this writing */
503 int ptlrpc_queue_wait(struct ptlrpc_request *req)
506 struct l_wait_info lwi;
507 struct ptlrpc_client *cli = req->rq_client;
510 init_waitqueue_head(&req->rq_wait_for_rep);
511 CDEBUG(D_NET, "subsys: %s req %Ld opc %d level %d, conn level %d\n",
512 cli->cli_name, req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
513 req->rq_connection->c_level);
515 /* XXX probably both an import and connection level are needed */
516 if (req->rq_level > req->rq_connection->c_level) {
517 CERROR("process %d waiting for recovery (%d > %d)\n",
518 current->pid, req->rq_level, req->rq_connection->c_level);
520 spin_lock(&cli->cli_lock);
521 list_del_init(&req->rq_list);
522 list_add_tail(&req->rq_list, &cli->cli_delayed_head);
523 spin_unlock(&cli->cli_lock);
525 #warning shaver: what happens when we get interrupted during this wait?
526 lwi = LWI_INTR(SIGTERM | SIGKILL | SIGINT, NULL, NULL);
527 l_wait_event(req->rq_wait_for_rep,
528 req->rq_level <= req->rq_connection->c_level,
531 spin_lock(&cli->cli_lock);
532 list_del_init(&req->rq_list);
533 spin_unlock(&cli->cli_lock);
535 CERROR("process %d resumed\n", current->pid);
538 req->rq_time = CURRENT_TIME;
539 req->rq_timeout = 100;
540 rc = ptl_send_rpc(req);
542 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
545 ptlrpc_cleanup_request_buf(req);
546 up(&cli->cli_rpc_sem);
550 spin_lock(&cli->cli_lock);
551 list_del_init(&req->rq_list);
552 list_add_tail(&req->rq_list, &cli->cli_sending_head);
553 spin_unlock(&cli->cli_lock);
555 CDEBUG(D_OTHER, "-- sleeping\n");
556 lwi = LWI_TIMEOUT_INTR(req->rq_timeout * HZ, expired_request,
557 SIGKILL | SIGTERM | SIGINT, interrupted_request,
559 l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
560 CDEBUG(D_OTHER, "-- done\n");
562 /* Don't resend if we were interrupted. */
563 if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) ==
565 req->rq_flags &= ~PTL_RPC_FL_RESEND;
569 up(&cli->cli_rpc_sem);
570 if (req->rq_flags & PTL_RPC_FL_INTR) {
571 if (!(req->rq_flags & PTL_RPC_FL_TIMEOUT))
572 LBUG(); /* should only be interrupted if we timed out. */
573 /* Clean up the dangling reply buffers */
575 GOTO(out, rc = -EINTR);
578 if (req->rq_flags & PTL_RPC_FL_TIMEOUT)
579 GOTO(out, rc = -ETIMEDOUT);
581 if (!(req->rq_flags & PTL_RPC_FL_REPLIED))
582 GOTO(out, rc = req->rq_status);
584 rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
586 CERROR("unpack_rep failed: %d\n", rc);
589 CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
590 if (req->rq_repmsg->status == 0)
591 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
592 req->rq_replen, req->rq_repmsg->status);
594 spin_lock(&cli->cli_lock);
595 cli->cli_last_xid = req->rq_repmsg->last_xid;
596 cli->cli_last_committed = req->rq_repmsg->last_committed;
597 ptlrpc_free_committed(cli);
598 spin_unlock(&cli->cli_lock);
605 int ptlrpc_replay_req(struct ptlrpc_request *req)
608 struct ptlrpc_client *cli = req->rq_client;
609 struct l_wait_info lwi = LWI_INTR(SIGKILL|SIGTERM|SIGINT, NULL, NULL);
612 init_waitqueue_head(&req->rq_wait_for_rep);
613 CDEBUG(D_NET, "req %Ld opc %d level %d, conn level %d\n",
614 req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
615 req->rq_connection->c_level);
617 req->rq_time = CURRENT_TIME;
618 req->rq_timeout = 100;
619 rc = ptl_send_rpc(req);
621 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
622 ptlrpc_cleanup_request_buf(req);
623 up(&cli->cli_rpc_sem);
627 CDEBUG(D_OTHER, "-- sleeping\n");
628 l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
629 CDEBUG(D_OTHER, "-- done\n");
631 up(&cli->cli_rpc_sem);
633 if (!(req->rq_flags & PTL_RPC_FL_REPLIED)) {
634 CERROR("Unknown reason for wakeup\n");
635 /* XXX Phil - I end up here when I kill obdctl */
637 GOTO(out, rc = -EINTR);
640 rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
642 CERROR("unpack_rep failed: %d\n", rc);
646 CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
647 if (req->rq_repmsg->status == 0)
648 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
649 req->rq_replen, req->rq_repmsg->status);
651 CERROR("recovery failed: ");
652 CERROR("req %Ld opc %d level %d, conn level %d\n",
653 req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
654 req->rq_connection->c_level);