1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #define DEBUG_SUBSYSTEM S_RPC
25 #include <linux/lustre_ha.h>
27 void ptlrpc_init_client(struct recovd_obd *recovd,
28 int (*recover)(struct ptlrpc_client *recover),
30 int rep_portal, struct ptlrpc_client *cl)
32 memset(cl, 0, sizeof(*cl));
33 cl->cli_recovd = recovd;
34 cl->cli_recover = recover;
36 recovd_cli_manage(recovd, cl);
38 cl->cli_request_portal = req_portal;
39 cl->cli_reply_portal = rep_portal;
40 INIT_LIST_HEAD(&cl->cli_delayed_head);
41 INIT_LIST_HEAD(&cl->cli_sending_head);
42 INIT_LIST_HEAD(&cl->cli_dying_head);
43 spin_lock_init(&cl->cli_lock);
44 sema_init(&cl->cli_rpc_sem, 32);
47 __u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
49 return req->rq_connection->c_remote_uuid;
52 struct ptlrpc_connection *ptlrpc_uuid_to_connection(char *uuid)
54 struct ptlrpc_connection *c;
55 struct lustre_peer peer;
58 err = kportal_uuid_to_peer(uuid, &peer);
60 CERROR("cannot find peer %s!\n", uuid);
64 c = ptlrpc_get_connection(&peer);
66 memcpy(c->c_remote_uuid, uuid, sizeof(c->c_remote_uuid));
73 void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, char *uuid)
75 struct lustre_peer peer;
78 err = kportal_uuid_to_peer(uuid, &peer);
80 CERROR("cannot find peer %s!\n", uuid);
84 memcpy(&conn->c_peer, &peer, sizeof(peer));
88 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn)
90 struct ptlrpc_bulk_desc *bulk;
92 OBD_ALLOC(bulk, sizeof(*bulk));
94 bulk->b_connection = ptlrpc_connection_addref(conn);
95 atomic_set(&bulk->b_pages_remaining, 0);
96 init_waitqueue_head(&bulk->b_waitq);
97 INIT_LIST_HEAD(&bulk->b_page_list);
103 struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc)
105 struct ptlrpc_bulk_page *bulk;
107 OBD_ALLOC(bulk, sizeof(*bulk));
110 ptl_set_inv_handle(&bulk->b_md_h);
111 ptl_set_inv_handle(&bulk->b_me_h);
112 list_add_tail(&bulk->b_link, &desc->b_page_list);
113 desc->b_page_count++;
114 atomic_inc(&desc->b_pages_remaining);
119 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
121 struct list_head *tmp, *next;
128 list_for_each_safe(tmp, next, &desc->b_page_list) {
129 struct ptlrpc_bulk_page *bulk;
130 bulk = list_entry(tmp, struct ptlrpc_bulk_page, b_link);
131 ptlrpc_free_bulk_page(bulk);
134 ptlrpc_put_connection(desc->b_connection);
136 OBD_FREE(desc, sizeof(*desc));
140 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
148 list_del(&bulk->b_link);
149 bulk->b_desc->b_page_count--;
150 OBD_FREE(bulk, sizeof(*bulk));
154 struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
155 struct ptlrpc_connection *conn,
156 int opcode, int count, int *lengths,
159 struct ptlrpc_request *request;
163 OBD_ALLOC(request, sizeof(*request));
165 CERROR("request allocation out of memory\n");
169 rc = lustre_pack_msg(count, lengths, bufs,
170 &request->rq_reqlen, &request->rq_reqmsg);
172 CERROR("cannot pack request %d\n", rc);
173 OBD_FREE(request, sizeof(*request));
177 request->rq_type = PTL_RPC_TYPE_REQUEST;
178 request->rq_connection = ptlrpc_connection_addref(conn);
180 request->rq_reqmsg->conn = (__u64)(unsigned long)conn->c_remote_conn;
181 request->rq_reqmsg->token = conn->c_remote_token;
182 request->rq_reqmsg->opc = HTON__u32(opcode);
183 request->rq_reqmsg->type = HTON__u32(PTL_RPC_MSG_REQUEST);
184 request->rq_reqmsg->target_id = HTON__u32(cl->cli_target_devno);
186 INIT_LIST_HEAD(&request->rq_list);
187 INIT_LIST_HEAD(&request->rq_multi);
189 /* this will be dec()d once in req_finished, once in free_committed */
190 atomic_set(&request->rq_refcount, 2);
192 spin_lock(&conn->c_lock);
193 request->rq_xid = HTON__u32(++conn->c_xid_out);
194 request->rq_xid = conn->c_xid_out;
195 spin_unlock(&conn->c_lock);
197 request->rq_client = cl;
202 void ptlrpc_req_finished(struct ptlrpc_request *request)
207 if (request->rq_repmsg != NULL) {
208 OBD_FREE(request->rq_repmsg, request->rq_replen);
209 request->rq_repmsg = NULL;
210 request->rq_reply_md.start = NULL;
213 if (atomic_dec_and_test(&request->rq_refcount))
214 ptlrpc_free_req(request);
217 void ptlrpc_free_req(struct ptlrpc_request *request)
220 if (request == NULL) {
225 if (request->rq_repmsg != NULL)
226 OBD_FREE(request->rq_repmsg, request->rq_replen);
227 if (request->rq_reqmsg != NULL)
228 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
230 if (request->rq_client) {
231 spin_lock(&request->rq_client->cli_lock);
232 list_del_init(&request->rq_list);
233 spin_unlock(&request->rq_client->cli_lock);
236 ptlrpc_put_connection(request->rq_connection);
237 list_del(&request->rq_multi);
238 OBD_FREE(request, sizeof(*request));
242 static int ptlrpc_check_reply(struct ptlrpc_request *req)
246 if (req->rq_repmsg != NULL) {
247 req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
248 req->rq_flags |= PTL_RPC_FL_REPLIED;
252 if (req->rq_flags & PTL_RPC_FL_RESEND) {
253 CERROR("-- RESEND --\n");
257 if (req->rq_flags & PTL_RPC_FL_RECOVERY) {
258 CERROR("-- RESTART --\n");
263 if (CURRENT_TIME - req->rq_time >= req->rq_timeout) {
264 CERROR("-- REQ TIMEOUT ON CONNID %d XID %Ld --\n",
265 req->rq_connid, (unsigned long long)req->rq_xid);
266 /* clear the timeout */
268 req->rq_connection->c_level = LUSTRE_CONN_RECOVD;
269 req->rq_flags |= PTL_RPC_FL_TIMEOUT;
270 if (req->rq_client && req->rq_client->cli_recovd)
271 recovd_cli_fail(req->rq_client);
272 if (req->rq_level < LUSTRE_CONN_FULL)
279 if (req->rq_timeout) {
280 schedule_timeout(req->rq_timeout * HZ);
283 if (sigismember(&(current->pending.signal), SIGKILL) ||
284 sigismember(&(current->pending.signal), SIGTERM) ||
285 sigismember(&(current->pending.signal), SIGINT)) {
286 req->rq_flags |= PTL_RPC_FL_INTR;
294 int ptlrpc_check_status(struct ptlrpc_request *req, int err)
299 CERROR("err is %d\n", err);
304 CERROR("req == NULL\n");
308 if (req->rq_repmsg == NULL) {
309 CERROR("req->rq_repmsg == NULL\n");
313 if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
314 CERROR("req->rq_repmsg->type == PTL_RPC_MSG_ERR\n");
318 if (req->rq_repmsg->status != 0) {
319 if (req->rq_repmsg->status < 0)
320 CERROR("req->rq_repmsg->status is %d\n",
321 req->rq_repmsg->status);
323 CDEBUG(D_INFO, "req->rq_repmsg->status is %d\n",
324 req->rq_repmsg->status);
325 /* XXX: translate this error from net to host */
326 RETURN(req->rq_repmsg->status);
332 static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request)
334 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
335 request->rq_reqmsg = NULL;
336 request->rq_reqlen = 0;
339 /* Abort this request and cleanup any resources associated with it. */
340 static int ptlrpc_abort(struct ptlrpc_request *request)
342 /* First remove the ME for the reply; in theory, this means
343 * that we can tear down the buffer safely. */
344 PtlMEUnlink(request->rq_reply_me_h);
345 OBD_FREE(request->rq_reply_md.start, request->rq_replen);
346 request->rq_repmsg = NULL;
347 request->rq_replen = 0;
351 /* caller must lock cli */
352 void ptlrpc_free_committed(struct ptlrpc_client *cli)
354 struct list_head *tmp, *saved;
355 struct ptlrpc_request *req;
357 list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
358 req = list_entry(tmp, struct ptlrpc_request, rq_list);
360 if ( (req->rq_flags & PTL_RPC_FL_REPLAY) ) {
361 CDEBUG(D_INFO, "Retaining request %Ld for replay\n",
366 /* not yet committed */
367 if (req->rq_transno > cli->cli_last_committed)
370 CDEBUG(D_INFO, "Marking request %Ld as committed ("
371 "transno=%Lu, last_committed=%Lu\n",
372 req->rq_xid, req->rq_transno,
373 cli->cli_last_committed);
374 if (atomic_dec_and_test(&req->rq_refcount)) {
375 /* we do this to prevent free_req deadlock */
376 list_del_init(&req->rq_list);
377 req->rq_client = NULL;
378 ptlrpc_free_req(req);
380 list_del_init(&req->rq_list);
381 list_add(&req->rq_list, &cli->cli_dying_head);
389 void ptlrpc_cleanup_client(struct ptlrpc_client *cli)
391 struct list_head *tmp, *saved;
392 struct ptlrpc_request *req;
395 spin_lock(&cli->cli_lock);
396 list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
397 req = list_entry(tmp, struct ptlrpc_request, rq_list);
398 CDEBUG(D_INFO, "Cleaning req %p from sending list.\n", req);
399 list_del_init(&req->rq_list);
400 req->rq_client = NULL;
401 ptlrpc_free_req(req);
403 list_for_each_safe(tmp, saved, &cli->cli_dying_head) {
404 req = list_entry(tmp, struct ptlrpc_request, rq_list);
405 CERROR("Request %p is on the dying list at cleanup!\n", req);
406 list_del_init(&req->rq_list);
407 req->rq_client = NULL;
408 ptlrpc_free_req(req);
410 spin_unlock(&cli->cli_lock);
416 void ptlrpc_continue_req(struct ptlrpc_request *req)
419 CDEBUG(D_INODE, "continue delayed request %Ld opc %d\n",
420 req->rq_xid, req->rq_reqmsg->opc);
421 wake_up_interruptible(&req->rq_wait_for_rep);
425 void ptlrpc_resend_req(struct ptlrpc_request *req)
428 CDEBUG(D_INODE, "resend request %Ld, opc %d\n",
429 req->rq_xid, req->rq_reqmsg->opc);
430 req->rq_status = -EAGAIN;
431 req->rq_level = LUSTRE_CONN_RECOVD;
432 req->rq_flags |= PTL_RPC_FL_RESEND;
433 req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
434 wake_up_interruptible(&req->rq_wait_for_rep);
438 void ptlrpc_restart_req(struct ptlrpc_request *req)
441 CDEBUG(D_INODE, "restart completed request %Ld, opc %d\n",
442 req->rq_xid, req->rq_reqmsg->opc);
443 req->rq_status = -ERESTARTSYS;
444 req->rq_flags |= PTL_RPC_FL_RECOVERY;
445 req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
446 wake_up_interruptible(&req->rq_wait_for_rep);
450 int ptlrpc_queue_wait(struct ptlrpc_request *req)
453 struct ptlrpc_client *cli = req->rq_client;
456 init_waitqueue_head(&req->rq_wait_for_rep);
457 CDEBUG(D_NET, "subsys: %s req %Ld opc %d level %d, conn level %d\n",
458 cli->cli_name, req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
459 req->rq_connection->c_level);
461 /* XXX probably both an import and connection level are needed */
462 if (req->rq_level > req->rq_connection->c_level) {
463 CERROR("process %d waiting for recovery\n", current->pid);
464 spin_lock(&cli->cli_lock);
465 list_del_init(&req->rq_list);
466 list_add(&req->rq_list, cli->cli_delayed_head.prev);
467 spin_unlock(&cli->cli_lock);
468 wait_event_interruptible
469 (req->rq_wait_for_rep,
470 req->rq_level <= req->rq_connection->c_level);
471 spin_lock(&cli->cli_lock);
472 list_del_init(&req->rq_list);
473 spin_unlock(&cli->cli_lock);
474 CERROR("process %d resumed\n", current->pid);
477 req->rq_time = CURRENT_TIME;
478 req->rq_timeout = 100;
479 rc = ptl_send_rpc(req);
481 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
484 ptlrpc_cleanup_request_buf(req);
485 up(&cli->cli_rpc_sem);
489 spin_lock(&cli->cli_lock);
490 list_del_init(&req->rq_list);
491 list_add_tail(&req->rq_list, &cli->cli_sending_head);
492 spin_unlock(&cli->cli_lock);
494 CDEBUG(D_OTHER, "-- sleeping\n");
495 wait_event_interruptible(req->rq_wait_for_rep,
496 ptlrpc_check_reply(req));
497 CDEBUG(D_OTHER, "-- done\n");
499 if (req->rq_flags & PTL_RPC_FL_RESEND) {
500 req->rq_flags &= ~PTL_RPC_FL_RESEND;
504 up(&cli->cli_rpc_sem);
505 if (req->rq_flags & PTL_RPC_FL_TIMEOUT)
506 GOTO(out, rc = -ETIMEDOUT);
508 if (req->rq_flags & PTL_RPC_FL_INTR) {
509 /* Clean up the dangling reply buffers */
511 GOTO(out, rc = -EINTR);
514 if (!(req->rq_flags & PTL_RPC_FL_REPLIED))
515 GOTO(out, rc = req->rq_status);
517 rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
519 CERROR("unpack_rep failed: %d\n", rc);
522 CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
523 if (req->rq_repmsg->status == 0)
524 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
525 req->rq_replen, req->rq_repmsg->status);
527 spin_lock(&cli->cli_lock);
528 cli->cli_last_rcvd = req->rq_repmsg->last_rcvd;
529 cli->cli_last_committed = req->rq_repmsg->last_committed;
530 ptlrpc_free_committed(cli);
531 spin_unlock(&cli->cli_lock);
538 int ptlrpc_replay_req(struct ptlrpc_request *req)
541 struct ptlrpc_client *cli = req->rq_client;
544 init_waitqueue_head(&req->rq_wait_for_rep);
545 CDEBUG(D_NET, "req %Ld opc %d level %d, conn level %d\n",
546 req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
547 req->rq_connection->c_level);
549 req->rq_time = CURRENT_TIME;
550 req->rq_timeout = 100;
551 rc = ptl_send_rpc(req);
553 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
554 ptlrpc_cleanup_request_buf(req);
555 up(&cli->cli_rpc_sem);
559 CDEBUG(D_OTHER, "-- sleeping\n");
560 wait_event_interruptible(req->rq_wait_for_rep,
561 ptlrpc_check_reply(req));
562 CDEBUG(D_OTHER, "-- done\n");
564 up(&cli->cli_rpc_sem);
566 if (!(req->rq_flags & PTL_RPC_FL_REPLIED)) {
567 CERROR("Unknown reason for wakeup\n");
568 /* XXX Phil - I end up here when I kill obdctl */
570 GOTO(out, rc = -EINTR);
573 rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
575 CERROR("unpack_rep failed: %d\n", rc);
579 CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
580 if (req->rq_repmsg->status == 0)
581 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
582 req->rq_replen, req->rq_repmsg->status);
584 CERROR("recovery failed: ");
585 CERROR("req %Ld opc %d level %d, conn level %d\n",
586 req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
587 req->rq_connection->c_level);