Whamcloud - gitweb
- More changes in the connection handle stuff. We are back to where
[fs/lustre-release.git] / lustre / ptlrpc / client.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22
23 #define DEBUG_SUBSYSTEM S_RPC
24
25 #include <linux/lustre_ha.h>
26
27 void ptlrpc_init_client(struct recovd_obd *recovd, 
28                         int (*recover)(struct ptlrpc_client *recover),
29                         int req_portal,
30                         int rep_portal, struct ptlrpc_client *cl)
31 {
32         memset(cl, 0, sizeof(*cl));
33         cl->cli_recovd = recovd;
34         cl->cli_recover = recover;
35         if (recovd)
36                 recovd_cli_manage(recovd, cl);
37         cl->cli_obd = NULL;
38         cl->cli_request_portal = req_portal;
39         cl->cli_reply_portal = rep_portal;
40         INIT_LIST_HEAD(&cl->cli_delayed_head);
41         INIT_LIST_HEAD(&cl->cli_sending_head);
42         INIT_LIST_HEAD(&cl->cli_dying_head);
43         spin_lock_init(&cl->cli_lock);
44         sema_init(&cl->cli_rpc_sem, 32);
45 }
46
47 __u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
48 {
49         return req->rq_connection->c_remote_uuid;
50 }
51
52 struct ptlrpc_connection *ptlrpc_uuid_to_connection(char *uuid)
53 {
54         struct ptlrpc_connection *c;
55         struct lustre_peer peer;
56         int err;
57
58         err = kportal_uuid_to_peer(uuid, &peer);
59         if (err != 0) {
60                 CERROR("cannot find peer %s!\n", uuid);
61                 return NULL;
62         }
63
64         c = ptlrpc_get_connection(&peer);
65         if (c) { 
66                 memcpy(c->c_remote_uuid, uuid, sizeof(c->c_remote_uuid));
67                 c->c_epoch++;
68         }
69
70         return c;
71 }
72
73 void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, char *uuid)
74 {
75         struct lustre_peer peer;
76         int err;
77
78         err = kportal_uuid_to_peer(uuid, &peer);
79         if (err != 0) {
80                 CERROR("cannot find peer %s!\n", uuid);
81                 return;
82         }
83         
84         memcpy(&conn->c_peer, &peer, sizeof(peer)); 
85         return;
86 }
87
88 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn)
89 {
90         struct ptlrpc_bulk_desc *bulk;
91
92         OBD_ALLOC(bulk, sizeof(*bulk));
93         if (bulk != NULL) {
94                 bulk->b_connection = ptlrpc_connection_addref(conn);
95                 atomic_set(&bulk->b_pages_remaining, 0);
96                 init_waitqueue_head(&bulk->b_waitq);
97                 INIT_LIST_HEAD(&bulk->b_page_list);
98         }
99
100         return bulk;
101 }
102
103 struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc)
104 {
105         struct ptlrpc_bulk_page *bulk;
106
107         OBD_ALLOC(bulk, sizeof(*bulk));
108         if (bulk != NULL) {
109                 bulk->b_desc = desc;
110                 ptl_set_inv_handle(&bulk->b_md_h);
111                 ptl_set_inv_handle(&bulk->b_me_h);
112                 list_add_tail(&bulk->b_link, &desc->b_page_list);
113                 desc->b_page_count++;
114                 atomic_inc(&desc->b_pages_remaining);
115         }
116         return bulk;
117 }
118
119 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
120 {
121         struct list_head *tmp, *next;
122         ENTRY;
123         if (desc == NULL) {
124                 EXIT;
125                 return;
126         }
127
128         list_for_each_safe(tmp, next, &desc->b_page_list) {
129                 struct ptlrpc_bulk_page *bulk;
130                 bulk = list_entry(tmp, struct ptlrpc_bulk_page, b_link);
131                 ptlrpc_free_bulk_page(bulk);
132         }
133
134         ptlrpc_put_connection(desc->b_connection);
135
136         OBD_FREE(desc, sizeof(*desc));
137         EXIT;
138 }
139
140 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
141 {
142         ENTRY;
143         if (bulk == NULL) {
144                 EXIT;
145                 return;
146         }
147
148         list_del(&bulk->b_link);
149         bulk->b_desc->b_page_count--;
150         OBD_FREE(bulk, sizeof(*bulk));
151         EXIT;
152 }
153
154 struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
155                                        struct ptlrpc_connection *conn,
156                                        int opcode, int count, int *lengths,
157                                        char **bufs)
158 {
159         struct ptlrpc_request *request;
160         int rc;
161         ENTRY;
162
163         OBD_ALLOC(request, sizeof(*request));
164         if (!request) {
165                 CERROR("request allocation out of memory\n");
166                 RETURN(NULL);
167         }
168
169         rc = lustre_pack_msg(count, lengths, bufs,
170                              &request->rq_reqlen, &request->rq_reqmsg);
171         if (rc) {
172                 CERROR("cannot pack request %d\n", rc);
173                 OBD_FREE(request, sizeof(*request));
174                 RETURN(NULL);
175         }
176
177         request->rq_type = PTL_RPC_TYPE_REQUEST;
178         request->rq_connection = ptlrpc_connection_addref(conn);
179
180         request->rq_reqmsg->conn2 = (__u64)(unsigned long)conn->c_remote_conn;
181         //request->rq_reqmsg->token = conn->c_remote_token;
182         request->rq_reqmsg->opc = HTON__u32(opcode);
183         request->rq_reqmsg->type = HTON__u32(PTL_RPC_MSG_REQUEST);
184         request->rq_reqmsg->target_id = HTON__u32(cl->cli_target_devno);
185
186         INIT_LIST_HEAD(&request->rq_list);
187         INIT_LIST_HEAD(&request->rq_multi);
188
189         /* this will be dec()d once in req_finished, once in free_committed */
190         atomic_set(&request->rq_refcount, 2);
191
192         spin_lock(&conn->c_lock);
193         request->rq_xid = HTON__u32(++conn->c_xid_out);
194         request->rq_xid = conn->c_xid_out;
195         spin_unlock(&conn->c_lock);
196
197         request->rq_client = cl;
198
199         RETURN(request);
200 }
201 struct ptlrpc_request *ptlrpc_prep_req2(struct ptlrpc_client *cl,
202                                         struct ptlrpc_connection *conn,
203                                         struct lustre_handle *handle, 
204                                        int opcode, int count, int *lengths,
205                                        char **bufs)
206 {
207         struct ptlrpc_request *req;
208         req = ptlrpc_prep_req(cl, conn, opcode, count, lengths, bufs);
209         ptlrpc_hdl2req(req, handle);
210         return req;
211 }
212
213 void ptlrpc_req_finished(struct ptlrpc_request *request)
214 {
215         if (request == NULL)
216                 return;
217
218         if (request->rq_repmsg != NULL) { 
219                 OBD_FREE(request->rq_repmsg, request->rq_replen);
220                 request->rq_repmsg = NULL;
221                 request->rq_reply_md.start = NULL; 
222         }
223
224         if (atomic_dec_and_test(&request->rq_refcount))
225                 ptlrpc_free_req(request);
226 }
227
228 void ptlrpc_free_req(struct ptlrpc_request *request)
229 {
230         ENTRY;
231         if (request == NULL) {
232                 EXIT;
233                 return;
234         }
235
236         if (request->rq_repmsg != NULL)
237                 OBD_FREE(request->rq_repmsg, request->rq_replen);
238         if (request->rq_reqmsg != NULL)
239                 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
240
241         if (request->rq_client) {
242                 spin_lock(&request->rq_client->cli_lock);
243                 list_del_init(&request->rq_list);
244                 spin_unlock(&request->rq_client->cli_lock);
245         }
246
247         ptlrpc_put_connection(request->rq_connection);
248         list_del(&request->rq_multi);
249         OBD_FREE(request, sizeof(*request));
250         EXIT;
251 }
252
253 static int ptlrpc_check_reply(struct ptlrpc_request *req)
254 {
255         int rc = 0;
256
257         if (req->rq_repmsg != NULL) {
258                 req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
259                 req->rq_flags |= PTL_RPC_FL_REPLIED;
260                 GOTO(out, rc = 1);
261         }
262
263         if (req->rq_flags & PTL_RPC_FL_RESEND) { 
264                 CERROR("-- RESEND --\n");
265                 GOTO(out, rc = 1);
266         }
267
268         if (req->rq_flags & PTL_RPC_FL_RECOVERY) { 
269                 CERROR("-- RESTART --\n");
270                 GOTO(out, rc = 1);
271         }
272
273
274         if (CURRENT_TIME - req->rq_time >= req->rq_timeout) {
275                 CERROR("-- REQ TIMEOUT ON CONNID %d XID %Ld --\n",
276                        req->rq_connid, (unsigned long long)req->rq_xid);
277                 /* clear the timeout */
278                 req->rq_timeout = 0;
279                 req->rq_connection->c_level = LUSTRE_CONN_RECOVD;
280                 req->rq_flags |= PTL_RPC_FL_TIMEOUT;
281                 if (req->rq_client && req->rq_client->cli_recovd)
282                         recovd_cli_fail(req->rq_client);
283                 if (req->rq_level < LUSTRE_CONN_FULL)
284                         rc = 1;
285                 else
286                         rc = 0;
287                 GOTO(out, rc);
288         }
289
290         if (req->rq_timeout) { 
291                 schedule_timeout(req->rq_timeout * HZ);
292         }
293
294         if (sigismember(&(current->pending.signal), SIGKILL) ||
295             sigismember(&(current->pending.signal), SIGTERM) ||
296             sigismember(&(current->pending.signal), SIGINT)) {
297                 req->rq_flags |= PTL_RPC_FL_INTR;
298                 GOTO(out, rc = 1);
299         }
300
301  out:
302         return rc;
303 }
304
305 int ptlrpc_check_status(struct ptlrpc_request *req, int err)
306 {
307         ENTRY;
308
309         if (err != 0) {
310                 CERROR("err is %d\n", err);
311                 RETURN(err);
312         }
313
314         if (req == NULL) {
315                 CERROR("req == NULL\n");
316                 RETURN(-ENOMEM);
317         }
318
319         if (req->rq_repmsg == NULL) {
320                 CERROR("req->rq_repmsg == NULL\n");
321                 RETURN(-ENOMEM);
322         }
323
324         if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
325                 CERROR("req->rq_repmsg->type == PTL_RPC_MSG_ERR\n");
326                 RETURN(-EINVAL);
327         }
328
329         if (req->rq_repmsg->status != 0) {
330                 if (req->rq_repmsg->status < 0)
331                         CERROR("req->rq_repmsg->status is %d\n",
332                                req->rq_repmsg->status);
333                 else
334                         CDEBUG(D_INFO, "req->rq_repmsg->status is %d\n",
335                                req->rq_repmsg->status);
336                 /* XXX: translate this error from net to host */
337                 RETURN(req->rq_repmsg->status);
338         }
339
340         RETURN(0);
341 }
342
343 static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request)
344 {
345         OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
346         request->rq_reqmsg = NULL;
347         request->rq_reqlen = 0;
348 }
349
350 /* Abort this request and cleanup any resources associated with it. */
351 static int ptlrpc_abort(struct ptlrpc_request *request)
352 {
353         /* First remove the ME for the reply; in theory, this means
354          * that we can tear down the buffer safely. */
355         PtlMEUnlink(request->rq_reply_me_h);
356         OBD_FREE(request->rq_reply_md.start, request->rq_replen);
357         request->rq_repmsg = NULL;
358         request->rq_replen = 0;
359         return 0;
360 }
361
362 /* caller must lock cli */
363 void ptlrpc_free_committed(struct ptlrpc_client *cli)
364 {
365         struct list_head *tmp, *saved;
366         struct ptlrpc_request *req;
367
368         list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
369                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
370
371                 if ( (req->rq_flags & PTL_RPC_FL_REPLAY) ) { 
372                         CDEBUG(D_INFO, "Retaining request %Ld for replay\n",
373                                req->rq_xid);
374                         continue;
375                 }
376
377                 /* not yet committed */
378                 if (req->rq_transno > cli->cli_last_committed)
379                         break;
380
381                 CDEBUG(D_INFO, "Marking request %Ld as committed ("
382                        "transno=%Lu, last_committed=%Lu\n", 
383                        req->rq_xid, req->rq_transno, 
384                        cli->cli_last_committed);
385                 if (atomic_dec_and_test(&req->rq_refcount)) {
386                         /* we do this to prevent free_req deadlock */
387                         list_del_init(&req->rq_list); 
388                         req->rq_client = NULL;
389                         ptlrpc_free_req(req);
390                 } else {
391                         list_del_init(&req->rq_list);
392                         list_add(&req->rq_list, &cli->cli_dying_head);
393                 }
394         }
395
396         EXIT;
397         return;
398 }
399
400 void ptlrpc_cleanup_client(struct ptlrpc_client *cli)
401 {
402         struct list_head *tmp, *saved;
403         struct ptlrpc_request *req;
404         ENTRY;
405
406         spin_lock(&cli->cli_lock);
407         list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
408                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
409                 CDEBUG(D_INFO, "Cleaning req %p from sending list.\n", req);
410                 list_del_init(&req->rq_list);
411                 req->rq_client = NULL;
412                 ptlrpc_free_req(req); 
413         }
414         list_for_each_safe(tmp, saved, &cli->cli_dying_head) {
415                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
416                 CERROR("Request %p is on the dying list at cleanup!\n", req);
417                 list_del_init(&req->rq_list);
418                 req->rq_client = NULL;
419                 ptlrpc_free_req(req); 
420         }
421         spin_unlock(&cli->cli_lock);
422
423         EXIT;
424         return;
425 }
426
427 void ptlrpc_continue_req(struct ptlrpc_request *req)
428 {
429         ENTRY;
430         CDEBUG(D_INODE, "continue delayed request %Ld opc %d\n", 
431                req->rq_xid, req->rq_reqmsg->opc); 
432         wake_up_interruptible(&req->rq_wait_for_rep); 
433         EXIT;
434 }
435
436 void ptlrpc_resend_req(struct ptlrpc_request *req)
437 {
438         ENTRY;
439         CDEBUG(D_INODE, "resend request %Ld, opc %d\n", 
440                req->rq_xid, req->rq_reqmsg->opc);
441         req->rq_status = -EAGAIN;
442         req->rq_level = LUSTRE_CONN_RECOVD;
443         req->rq_flags |= PTL_RPC_FL_RESEND;
444         req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
445         wake_up_interruptible(&req->rq_wait_for_rep);
446         EXIT;
447 }
448
449 void ptlrpc_restart_req(struct ptlrpc_request *req)
450 {
451         ENTRY;
452         CDEBUG(D_INODE, "restart completed request %Ld, opc %d\n", 
453                req->rq_xid, req->rq_reqmsg->opc);
454         req->rq_status = -ERESTARTSYS;
455         req->rq_flags |= PTL_RPC_FL_RECOVERY;
456         req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
457         wake_up_interruptible(&req->rq_wait_for_rep);
458         EXIT;
459 }
460
461 int ptlrpc_queue_wait(struct ptlrpc_request *req)
462 {
463         int rc = 0;
464         struct ptlrpc_client *cli = req->rq_client;
465         ENTRY;
466
467         init_waitqueue_head(&req->rq_wait_for_rep);
468         CDEBUG(D_NET, "subsys: %s req %Ld opc %d level %d, conn level %d\n",
469                cli->cli_name, req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
470                req->rq_connection->c_level);
471
472         /* XXX probably both an import and connection level are needed */
473         if (req->rq_level > req->rq_connection->c_level) { 
474                 CERROR("process %d waiting for recovery\n", current->pid);
475                 spin_lock(&cli->cli_lock);
476                 list_del_init(&req->rq_list);
477                 list_add(&req->rq_list, cli->cli_delayed_head.prev); 
478                 spin_unlock(&cli->cli_lock);
479                 wait_event_interruptible
480                         (req->rq_wait_for_rep, 
481                          req->rq_level <= req->rq_connection->c_level);
482                 spin_lock(&cli->cli_lock);
483                 list_del_init(&req->rq_list);
484                 spin_unlock(&cli->cli_lock);
485                 CERROR("process %d resumed\n", current->pid);
486         }
487  resend:
488         req->rq_time = CURRENT_TIME;
489         req->rq_timeout = 100;
490         rc = ptl_send_rpc(req);
491         if (rc) {
492                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
493                 if ( rc > 0 ) 
494                         rc = -rc;
495                 ptlrpc_cleanup_request_buf(req);
496                 up(&cli->cli_rpc_sem);
497                 RETURN(-rc);
498         }
499
500         spin_lock(&cli->cli_lock);
501         list_del_init(&req->rq_list);
502         list_add_tail(&req->rq_list, &cli->cli_sending_head);
503         spin_unlock(&cli->cli_lock);
504
505         CDEBUG(D_OTHER, "-- sleeping\n");
506         wait_event_interruptible(req->rq_wait_for_rep, 
507                                  ptlrpc_check_reply(req));
508         CDEBUG(D_OTHER, "-- done\n");
509
510         if (req->rq_flags & PTL_RPC_FL_RESEND) {
511                 req->rq_flags &= ~PTL_RPC_FL_RESEND;
512                 goto resend;
513         }
514
515         up(&cli->cli_rpc_sem);
516         if (req->rq_flags & PTL_RPC_FL_TIMEOUT)
517                 GOTO(out, rc = -ETIMEDOUT);
518
519         if (req->rq_flags & PTL_RPC_FL_INTR) {
520                 /* Clean up the dangling reply buffers */
521                 ptlrpc_abort(req);
522                 GOTO(out, rc = -EINTR);
523         }
524
525         if (!(req->rq_flags & PTL_RPC_FL_REPLIED))
526                 GOTO(out, rc = req->rq_status);
527
528         rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
529         if (rc) {
530                 CERROR("unpack_rep failed: %d\n", rc);
531                 GOTO(out, rc);
532         }
533         CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
534         if (req->rq_repmsg->status == 0)
535                 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
536                        req->rq_replen, req->rq_repmsg->status);
537
538         spin_lock(&cli->cli_lock);
539         cli->cli_last_rcvd = req->rq_repmsg->last_rcvd;
540         cli->cli_last_committed = req->rq_repmsg->last_committed;
541         ptlrpc_free_committed(cli); 
542         spin_unlock(&cli->cli_lock);
543
544         EXIT;
545  out:
546         return rc;
547 }
548
549 int ptlrpc_replay_req(struct ptlrpc_request *req)
550 {
551         int rc = 0;
552         struct ptlrpc_client *cli = req->rq_client;
553         ENTRY;
554
555         init_waitqueue_head(&req->rq_wait_for_rep);
556         CDEBUG(D_NET, "req %Ld opc %d level %d, conn level %d\n",
557                req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
558                req->rq_connection->c_level);
559
560         req->rq_time = CURRENT_TIME;
561         req->rq_timeout = 100;
562         rc = ptl_send_rpc(req);
563         if (rc) {
564                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
565                 ptlrpc_cleanup_request_buf(req);
566                 up(&cli->cli_rpc_sem);
567                 RETURN(-rc);
568         }
569
570         CDEBUG(D_OTHER, "-- sleeping\n");
571         wait_event_interruptible(req->rq_wait_for_rep, 
572                                  ptlrpc_check_reply(req));
573         CDEBUG(D_OTHER, "-- done\n");
574
575         up(&cli->cli_rpc_sem);
576
577         if (!(req->rq_flags & PTL_RPC_FL_REPLIED)) {
578                 CERROR("Unknown reason for wakeup\n");
579                 /* XXX Phil - I end up here when I kill obdctl */
580                 ptlrpc_abort(req);
581                 GOTO(out, rc = -EINTR);
582         }
583
584         rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
585         if (rc) {
586                 CERROR("unpack_rep failed: %d\n", rc);
587                 GOTO(out, rc);
588         }
589
590         CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
591         if (req->rq_repmsg->status == 0)
592                 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
593                        req->rq_replen, req->rq_repmsg->status);
594         else {
595                 CERROR("recovery failed: "); 
596                 CERROR("req %Ld opc %d level %d, conn level %d\n", 
597                        req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
598                        req->rq_connection->c_level);
599                 LBUG();
600         }
601
602  out:
603         RETURN(rc);
604 }