Whamcloud - gitweb
bcdcc4a026345003b739c0d471098c3e523b2e8d
[fs/lustre-release.git] / lustre / ptlrpc / client.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22
23 #define DEBUG_SUBSYSTEM S_RPC
24
25 #include <linux/obd_support.h>
26 #include <linux/obd_class.h>
27 #include <linux/lustre_lib.h>
28 #include <linux/lustre_ha.h>
29
30 void ptlrpc_init_client(int req_portal, int rep_portal, struct ptlrpc_client *cl,
31                         struct ptlrpc_connection *conn)
32 {
33         memset(cl, 0, sizeof(*cl));
34         /* Some things, like the LDLM, can call us without a connection.
35          * I don't like it one bit.
36          */
37         if (conn) {
38                 cl->cli_connection = conn;
39                 list_add(&cl->cli_client_chain, &conn->c_clients);
40         }
41         cl->cli_obd = NULL;
42         cl->cli_request_portal = req_portal;
43         cl->cli_reply_portal = rep_portal;
44         sema_init(&cl->cli_rpc_sem, 32);
45 }
46
47 __u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
48 {
49         return req->rq_connection->c_remote_uuid;
50 }
51
52 struct ptlrpc_connection *ptlrpc_uuid_to_connection(char *uuid)
53 {
54         struct ptlrpc_connection *c;
55         struct lustre_peer peer;
56         int err;
57
58         err = kportal_uuid_to_peer(uuid, &peer);
59         if (err != 0) {
60                 CERROR("cannot find peer %s!\n", uuid);
61                 return NULL;
62         }
63
64         c = ptlrpc_get_connection(&peer);
65         if (c) { 
66                 memcpy(c->c_remote_uuid, uuid, sizeof(c->c_remote_uuid));
67                 c->c_epoch++;
68         }
69
70         return c;
71 }
72
73 void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, char *uuid)
74 {
75         struct lustre_peer peer;
76         int err;
77
78         err = kportal_uuid_to_peer(uuid, &peer);
79         if (err != 0) {
80                 CERROR("cannot find peer %s!\n", uuid);
81                 return;
82         }
83         
84         memcpy(&conn->c_peer, &peer, sizeof(peer)); 
85         return;
86 }
87
88 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn)
89 {
90         struct ptlrpc_bulk_desc *desc;
91
92         OBD_ALLOC(desc, sizeof(*desc));
93         if (desc != NULL) {
94                 desc->b_connection = ptlrpc_connection_addref(conn);
95                 atomic_set(&desc->b_refcount, 1);
96                 init_waitqueue_head(&desc->b_waitq);
97                 INIT_LIST_HEAD(&desc->b_page_list);
98                 ptl_set_inv_handle(&desc->b_md_h);
99                 ptl_set_inv_handle(&desc->b_me_h);
100         }
101
102         return desc;
103 }
104
105 struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc)
106 {
107         struct ptlrpc_bulk_page *bulk;
108
109         OBD_ALLOC(bulk, sizeof(*bulk));
110         if (bulk != NULL) {
111                 bulk->b_desc = desc;
112                 list_add_tail(&bulk->b_link, &desc->b_page_list);
113                 desc->b_page_count++;
114         }
115         return bulk;
116 }
117
118 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
119 {
120         struct list_head *tmp, *next;
121         ENTRY;
122         if (desc == NULL) {
123                 EXIT;
124                 return;
125         }
126
127         list_for_each_safe(tmp, next, &desc->b_page_list) {
128                 struct ptlrpc_bulk_page *bulk;
129                 bulk = list_entry(tmp, struct ptlrpc_bulk_page, b_link);
130                 ptlrpc_free_bulk_page(bulk);
131         }
132
133         ptlrpc_put_connection(desc->b_connection);
134
135         OBD_FREE(desc, sizeof(*desc));
136         EXIT;
137 }
138
139 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
140 {
141         ENTRY;
142         if (bulk == NULL) {
143                 EXIT;
144                 return;
145         }
146
147         list_del(&bulk->b_link);
148         bulk->b_desc->b_page_count--;
149         OBD_FREE(bulk, sizeof(*bulk));
150         EXIT;
151 }
152
153 struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
154                                        int opcode, int count, int *lengths,
155                                        char **bufs)
156 {
157         struct ptlrpc_request *request;
158         struct ptlrpc_connection *conn = cl->cli_connection;
159         int rc;
160         ENTRY;
161
162         OBD_ALLOC(request, sizeof(*request));
163         if (!request) {
164                 CERROR("request allocation out of memory\n");
165                 RETURN(NULL);
166         }
167
168         rc = lustre_pack_msg(count, lengths, bufs,
169                              &request->rq_reqlen, &request->rq_reqmsg);
170         if (rc) {
171                 CERROR("cannot pack request %d\n", rc);
172                 OBD_FREE(request, sizeof(*request));
173                 RETURN(NULL);
174         }
175
176         request->rq_level = LUSTRE_CONN_FULL;
177         request->rq_type = PTL_RPC_TYPE_REQUEST;
178         request->rq_client = cl;
179         request->rq_connection = ptlrpc_connection_addref(conn);
180
181         INIT_LIST_HEAD(&request->rq_list);
182         INIT_LIST_HEAD(&request->rq_multi);
183         /* this will be dec()d once in req_finished, once in free_committed */
184         atomic_set(&request->rq_refcount, 2);
185
186         spin_lock(&conn->c_lock);
187         request->rq_xid = HTON__u32(++conn->c_xid_out);
188         spin_unlock(&conn->c_lock);
189
190         request->rq_reqmsg->magic = PTLRPC_MSG_MAGIC; 
191         request->rq_reqmsg->version = PTLRPC_MSG_VERSION;
192         request->rq_reqmsg->opc = HTON__u32(opcode);
193         request->rq_reqmsg->type = HTON__u32(PTL_RPC_MSG_REQUEST);
194
195         RETURN(request);
196 }
197 struct ptlrpc_request *ptlrpc_prep_req2(struct lustre_handle *conn, 
198                                        int opcode, int count, int *lengths,
199                                        char **bufs)
200 {
201         struct client_obd *clobd; 
202         struct ptlrpc_request *req;
203         struct obd_export *export;
204
205         export = class_conn2export(conn);
206         if (!export) { 
207                 LBUG();
208                 CERROR("NOT connected\n"); 
209                 return NULL;
210         }
211
212         clobd = &export->exp_obd->u.cli;
213         req = ptlrpc_prep_req(clobd->cl_client, opcode, count, lengths, bufs);
214         ptlrpc_hdl2req(req, &clobd->cl_exporth);
215         return req;
216 }
217
218 void ptlrpc_req_finished(struct ptlrpc_request *request)
219 {
220         if (request == NULL)
221                 return;
222
223         if (request->rq_repmsg != NULL) { 
224                 OBD_FREE(request->rq_repmsg, request->rq_replen);
225                 request->rq_repmsg = NULL;
226                 request->rq_reply_md.start = NULL; 
227         }
228
229         if (atomic_dec_and_test(&request->rq_refcount))
230                 ptlrpc_free_req(request);
231 }
232
233 void ptlrpc_free_req(struct ptlrpc_request *request)
234 {
235         ENTRY;
236         if (request == NULL) {
237                 EXIT;
238                 return;
239         }
240
241         if (request->rq_repmsg != NULL)
242                 OBD_FREE(request->rq_repmsg, request->rq_replen);
243         if (request->rq_reqmsg != NULL)
244                 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
245
246         if (request->rq_connection) {
247                 spin_lock(&request->rq_connection->c_lock);
248                 list_del_init(&request->rq_list);
249                 spin_unlock(&request->rq_connection->c_lock);
250         }
251
252         ptlrpc_put_connection(request->rq_connection);
253         list_del(&request->rq_multi);
254         OBD_FREE(request, sizeof(*request));
255         EXIT;
256 }
257
258 static int ptlrpc_check_reply(struct ptlrpc_request *req)
259 {
260         int rc = 0;
261
262         if (req->rq_repmsg != NULL) {
263                 req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
264                 req->rq_flags |= PTL_RPC_FL_REPLIED;
265                 GOTO(out, rc = 1);
266         }
267
268         if (req->rq_flags & PTL_RPC_FL_RECOVERY) { 
269                 CERROR("-- RESTART --\n");
270                 GOTO(out, rc = 1);
271         }
272
273  out:
274         CDEBUG(D_NET, "req = %p, rc = %d\n", req, rc);
275         return rc;
276 }
277
278 int ptlrpc_check_status(struct ptlrpc_request *req, int err)
279 {
280         ENTRY;
281
282         if (err != 0) {
283                 CERROR("err is %d\n", err);
284                 RETURN(err);
285         }
286
287         if (req == NULL) {
288                 CERROR("req == NULL\n");
289                 RETURN(-ENOMEM);
290         }
291
292         if (req->rq_repmsg == NULL) {
293                 CERROR("req->rq_repmsg == NULL\n");
294                 RETURN(-ENOMEM);
295         }
296
297         err = req->rq_repmsg->status;
298         if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
299                 CERROR("req->rq_repmsg->type == PTL_RPC_MSG_ERR\n");
300                 RETURN(err ? err : -EINVAL);
301         }
302
303         if (err != 0) {
304                 if (err < 0)
305                         CERROR("req->rq_repmsg->status is %d\n", err);
306                 else
307                         CDEBUG(D_INFO, "req->rq_repmsg->status is %d\n", err);
308                 /* XXX: translate this error from net to host */
309                 RETURN(err);
310         }
311
312         RETURN(0);
313 }
314
315 static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request)
316 {
317         OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
318         request->rq_reqmsg = NULL;
319         request->rq_reqlen = 0;
320 }
321
322 /* Abort this request and cleanup any resources associated with it. */
323 static int ptlrpc_abort(struct ptlrpc_request *request)
324 {
325         /* First remove the ME for the reply; in theory, this means
326          * that we can tear down the buffer safely. */
327         PtlMEUnlink(request->rq_reply_me_h);
328         OBD_FREE(request->rq_reply_md.start, request->rq_replen);
329         request->rq_repmsg = NULL;
330         request->rq_replen = 0;
331         return 0;
332 }
333
334 /* caller must hold conn->c_lock */
335 void ptlrpc_free_committed(struct ptlrpc_connection *conn)
336 {
337         struct list_head *tmp, *saved;
338         struct ptlrpc_request *req;
339
340 restart:
341         list_for_each_safe(tmp, saved, &conn->c_sending_head) {
342                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
343
344                 if (req->rq_flags & PTL_RPC_FL_REPLAY) {
345                         CDEBUG(D_INFO, "Keeping req %p xid %Ld for replay\n",
346                                req, req->rq_xid);
347                         continue;
348                 }
349
350                 /* not yet committed */
351                 if (req->rq_transno > conn->c_last_committed)
352                         break;
353
354                 CDEBUG(D_INFO, "Marking request %p xid %Ld as committed "
355                        "transno=%Lu, last_committed=%Lu\n", req,
356                        (long long)req->rq_xid, (long long)req->rq_transno,
357                        (long long)conn->c_last_committed);
358                 if (atomic_dec_and_test(&req->rq_refcount)) {
359                         req->rq_client = NULL;
360
361                         /* We do this to prevent free_req deadlock.  Restarting
362                          * after each removal is not so bad, as we are almost
363                          * always deleting the first item in the list.
364                          */
365                         spin_unlock(&conn->c_lock);
366                         ptlrpc_free_req(req);
367                         spin_lock(&conn->c_lock);
368                         goto restart;
369                 } else {
370                         list_del(&req->rq_list);
371                         list_add(&req->rq_list, &conn->c_dying_head);
372                 }
373         }
374
375         EXIT;
376         return;
377 }
378
379 void ptlrpc_cleanup_client(struct ptlrpc_client *cli)
380 {
381         struct list_head *tmp, *saved;
382         struct ptlrpc_request *req;
383         struct ptlrpc_connection *conn = cli->cli_connection;
384         ENTRY;
385
386         if (!conn) {
387                 EXIT;
388                 return;
389         }
390
391 restart1:
392         spin_lock(&conn->c_lock);
393         list_for_each_safe(tmp, saved, &conn->c_sending_head) {
394                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
395                 if (req->rq_client != cli)
396                         continue;
397                 CDEBUG(D_INFO, "Cleaning req %p from sending list.\n", req);
398                 list_del_init(&req->rq_list);
399                 req->rq_client = NULL;
400                 spin_unlock(&conn->c_lock);
401                 ptlrpc_free_req(req);
402                 goto restart1;
403         }
404 restart2:
405         list_for_each_safe(tmp, saved, &conn->c_dying_head) {
406                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
407                 if (req->rq_client != cli)
408                         continue;
409                 CERROR("Request %p is on the dying list at cleanup!\n", req);
410                 list_del_init(&req->rq_list);
411                 req->rq_client = NULL;
412                 spin_unlock(&conn->c_lock);
413                 ptlrpc_free_req(req); 
414                 spin_lock(&conn->c_lock);
415                 goto restart2;
416         }
417         spin_unlock(&conn->c_lock);
418
419         EXIT;
420         return;
421 }
422
423 void ptlrpc_continue_req(struct ptlrpc_request *req)
424 {
425         ENTRY;
426         CDEBUG(D_INODE, "continue delayed request %Ld opc %d\n", 
427                req->rq_xid, req->rq_reqmsg->opc); 
428         wake_up(&req->rq_wait_for_rep); 
429         EXIT;
430 }
431
432 void ptlrpc_resend_req(struct ptlrpc_request *req)
433 {
434         ENTRY;
435         CDEBUG(D_INODE, "resend request %Ld, opc %d\n", 
436                req->rq_xid, req->rq_reqmsg->opc);
437         req->rq_status = -EAGAIN;
438         req->rq_level = LUSTRE_CONN_RECOVD;
439         req->rq_flags |= PTL_RPC_FL_RESEND;
440         req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
441         wake_up(&req->rq_wait_for_rep);
442         EXIT;
443 }
444
445 void ptlrpc_restart_req(struct ptlrpc_request *req)
446 {
447         ENTRY;
448         CDEBUG(D_INODE, "restart completed request %Ld, opc %d\n", 
449                req->rq_xid, req->rq_reqmsg->opc);
450         req->rq_status = -ERESTARTSYS;
451         req->rq_flags |= PTL_RPC_FL_RECOVERY;
452         req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
453         wake_up(&req->rq_wait_for_rep);
454         EXIT;
455 }
456
457 static int expired_request(void *data)
458 {
459         struct ptlrpc_request *req = data;
460         
461         ENTRY;
462         CERROR("req timeout on connid %d xid %Ld\n", req->rq_connid,
463                (unsigned long long)req->rq_xid);
464         req->rq_timeout = 0;
465         req->rq_connection->c_level = LUSTRE_CONN_RECOVD;
466         req->rq_flags |= PTL_RPC_FL_TIMEOUT;
467         /* Activate the recovd for this client, if there is one. */
468         if (req->rq_client && req->rq_client->cli_connection &&
469             req->rq_client->cli_connection->c_recovd)
470                 recovd_conn_fail(req->rq_client->cli_connection);
471
472         /* If this request is for recovery or other primordial tasks,
473          * don't go back to sleep.
474          */
475         if (req->rq_level < LUSTRE_CONN_FULL)
476                 RETURN(1);
477         RETURN(0);
478 }
479
480 static int interrupted_request(void *data)
481 {
482         struct ptlrpc_request *req = data;
483         ENTRY;
484         req->rq_flags |= PTL_RPC_FL_INTR;
485         RETURN(1); /* ignored, as of this writing */
486 }
487
488 int ptlrpc_queue_wait(struct ptlrpc_request *req)
489 {
490         int rc = 0;
491         struct l_wait_info lwi;
492         struct ptlrpc_client *cli = req->rq_client;
493         struct ptlrpc_connection *conn = cli->cli_connection;
494         ENTRY;
495
496         init_waitqueue_head(&req->rq_wait_for_rep);
497         CDEBUG(D_NET, "subsys: %s req %Ld opc %d level %d, conn level %d\n",
498                cli->cli_name, req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
499                req->rq_connection->c_level);
500
501         /* XXX probably both an import and connection level are needed */
502         if (req->rq_level > conn->c_level) { 
503                 CERROR("process %d waiting for recovery (%d > %d)\n", 
504                        current->pid, req->rq_level, conn->c_level);
505
506                 spin_lock(&conn->c_lock);
507                 list_del(&req->rq_list);
508                 list_add_tail(&req->rq_list, &conn->c_delayed_head);
509                 spin_unlock(&conn->c_lock);
510
511                 lwi = LWI_INTR(NULL, NULL);
512                 rc = l_wait_event(req->rq_wait_for_rep,
513                                   req->rq_level <= conn->c_level, &lwi);
514
515                 spin_lock(&conn->c_lock);
516                 list_del_init(&req->rq_list);
517                 spin_unlock(&conn->c_lock);
518
519                 if (rc)
520                         RETURN(rc);
521
522                 CERROR("process %d resumed\n", current->pid);
523         }
524  resend:
525         req->rq_time = CURRENT_TIME;
526         req->rq_timeout = obd_timeout;
527         rc = ptl_send_rpc(req);
528         if (rc) {
529                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
530                 if ( rc > 0 ) 
531                         rc = -rc;
532                 ptlrpc_cleanup_request_buf(req);
533                 up(&cli->cli_rpc_sem);
534                 RETURN(-rc);
535         }
536
537         spin_lock(&conn->c_lock);
538         list_del(&req->rq_list);
539         list_add_tail(&req->rq_list, &conn->c_sending_head);
540         spin_unlock(&conn->c_lock);
541
542         CDEBUG(D_OTHER, "-- sleeping\n");
543         lwi = LWI_TIMEOUT_INTR(req->rq_timeout * HZ, expired_request,
544                                interrupted_request,req);
545         l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
546         CDEBUG(D_OTHER, "-- done\n");
547
548         /* Don't resend if we were interrupted. */
549         if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) ==
550             PTL_RPC_FL_RESEND) {
551                 req->rq_flags &= ~PTL_RPC_FL_RESEND;
552                 goto resend;
553         }
554
555         up(&cli->cli_rpc_sem);
556         if (req->rq_flags & PTL_RPC_FL_INTR) {
557                 if (!(req->rq_flags & PTL_RPC_FL_TIMEOUT))
558                         LBUG(); /* should only be interrupted if we timed out. */
559                 /* Clean up the dangling reply buffers */
560                 ptlrpc_abort(req);
561                 GOTO(out, rc = -EINTR);
562         }
563
564         if (req->rq_flags & PTL_RPC_FL_TIMEOUT)
565                 GOTO(out, rc = -ETIMEDOUT);
566
567         if (!(req->rq_flags & PTL_RPC_FL_REPLIED))
568                 GOTO(out, rc = req->rq_status);
569
570         rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
571         if (rc) {
572                 CERROR("unpack_rep failed: %d\n", rc);
573                 GOTO(out, rc);
574         }
575         CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
576         if (req->rq_repmsg->status == 0)
577                 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
578                        req->rq_replen, req->rq_repmsg->status);
579
580         spin_lock(&conn->c_lock);
581         conn->c_last_xid = req->rq_repmsg->last_xid;
582         conn->c_last_committed = req->rq_repmsg->last_committed;
583         ptlrpc_free_committed(conn);
584         spin_unlock(&conn->c_lock);
585
586         EXIT;
587  out:
588         return rc;
589 }
590
591 int ptlrpc_replay_req(struct ptlrpc_request *req)
592 {
593         int rc = 0;
594         struct ptlrpc_client *cli = req->rq_client;
595         struct l_wait_info lwi;
596         ENTRY;
597
598         init_waitqueue_head(&req->rq_wait_for_rep);
599         CDEBUG(D_NET, "req %Ld opc %d level %d, conn level %d\n",
600                req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
601                req->rq_connection->c_level);
602
603         req->rq_time = CURRENT_TIME;
604         req->rq_timeout = obd_timeout;
605         rc = ptl_send_rpc(req);
606         if (rc) {
607                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
608                 ptlrpc_cleanup_request_buf(req);
609                 up(&cli->cli_rpc_sem);
610                 RETURN(-rc);
611         }
612
613         CDEBUG(D_OTHER, "-- sleeping\n");
614         lwi = LWI_INTR(NULL, NULL);
615         l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
616         CDEBUG(D_OTHER, "-- done\n");
617
618         up(&cli->cli_rpc_sem);
619
620         if (!(req->rq_flags & PTL_RPC_FL_REPLIED)) {
621                 CERROR("Unknown reason for wakeup\n");
622                 /* XXX Phil - I end up here when I kill obdctl */
623                 ptlrpc_abort(req);
624                 GOTO(out, rc = -EINTR);
625         }
626
627         rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
628         if (rc) {
629                 CERROR("unpack_rep failed: %d\n", rc);
630                 GOTO(out, rc);
631         }
632
633         CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
634         if (req->rq_repmsg->status == 0)
635                 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
636                        req->rq_replen, req->rq_repmsg->status);
637         else {
638                 CERROR("recovery failed: "); 
639                 CERROR("req %Ld opc %d level %d, conn level %d\n", 
640                        req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
641                        req->rq_connection->c_level);
642                 LBUG();
643         }
644
645  out:
646         RETURN(rc);
647 }