Whamcloud - gitweb
* Fix interrupt-pending-when-timeout-occurs handling in l_wait_event.
[fs/lustre-release.git] / lustre / ptlrpc / client.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22
23 #define DEBUG_SUBSYSTEM S_RPC
24
25 #include <linux/obd_support.h>
26 #include <linux/obd_class.h>
27 #include <linux/lustre_lib.h>
28 #include <linux/lustre_ha.h>
29
30 void ptlrpc_init_client(struct recovd_obd *recovd, 
31                         int (*recover)(struct ptlrpc_client *recover),
32                         int req_portal,
33                         int rep_portal, struct ptlrpc_client *cl)
34 {
35         memset(cl, 0, sizeof(*cl));
36         cl->cli_recovd = recovd;
37         cl->cli_recover = recover;
38         if (recovd)
39                 recovd_cli_manage(recovd, cl);
40         cl->cli_obd = NULL;
41         cl->cli_request_portal = req_portal;
42         cl->cli_reply_portal = rep_portal;
43         INIT_LIST_HEAD(&cl->cli_delayed_head);
44         INIT_LIST_HEAD(&cl->cli_sending_head);
45         INIT_LIST_HEAD(&cl->cli_dying_head);
46         spin_lock_init(&cl->cli_lock);
47         sema_init(&cl->cli_rpc_sem, 32);
48 }
49
50 __u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
51 {
52         return req->rq_connection->c_remote_uuid;
53 }
54
55 struct ptlrpc_connection *ptlrpc_uuid_to_connection(char *uuid)
56 {
57         struct ptlrpc_connection *c;
58         struct lustre_peer peer;
59         int err;
60
61         err = kportal_uuid_to_peer(uuid, &peer);
62         if (err != 0) {
63                 CERROR("cannot find peer %s!\n", uuid);
64                 return NULL;
65         }
66
67         c = ptlrpc_get_connection(&peer);
68         if (c) { 
69                 memcpy(c->c_remote_uuid, uuid, sizeof(c->c_remote_uuid));
70                 c->c_epoch++;
71         }
72
73         return c;
74 }
75
76 void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, char *uuid)
77 {
78         struct lustre_peer peer;
79         int err;
80
81         err = kportal_uuid_to_peer(uuid, &peer);
82         if (err != 0) {
83                 CERROR("cannot find peer %s!\n", uuid);
84                 return;
85         }
86         
87         memcpy(&conn->c_peer, &peer, sizeof(peer)); 
88         return;
89 }
90
91 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn)
92 {
93         struct ptlrpc_bulk_desc *desc;
94
95         OBD_ALLOC(desc, sizeof(*desc));
96         if (desc != NULL) {
97                 desc->b_connection = ptlrpc_connection_addref(conn);
98                 atomic_set(&desc->b_refcount, 1);
99                 init_waitqueue_head(&desc->b_waitq);
100                 INIT_LIST_HEAD(&desc->b_page_list);
101                 ptl_set_inv_handle(&desc->b_md_h);
102                 ptl_set_inv_handle(&desc->b_me_h);
103         }
104
105         return desc;
106 }
107
108 struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc)
109 {
110         struct ptlrpc_bulk_page *bulk;
111
112         OBD_ALLOC(bulk, sizeof(*bulk));
113         if (bulk != NULL) {
114                 bulk->b_desc = desc;
115                 list_add_tail(&bulk->b_link, &desc->b_page_list);
116                 desc->b_page_count++;
117         }
118         return bulk;
119 }
120
121 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
122 {
123         struct list_head *tmp, *next;
124         ENTRY;
125         if (desc == NULL) {
126                 EXIT;
127                 return;
128         }
129
130         list_for_each_safe(tmp, next, &desc->b_page_list) {
131                 struct ptlrpc_bulk_page *bulk;
132                 bulk = list_entry(tmp, struct ptlrpc_bulk_page, b_link);
133                 ptlrpc_free_bulk_page(bulk);
134         }
135
136         ptlrpc_put_connection(desc->b_connection);
137
138         OBD_FREE(desc, sizeof(*desc));
139         EXIT;
140 }
141
142 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
143 {
144         ENTRY;
145         if (bulk == NULL) {
146                 EXIT;
147                 return;
148         }
149
150         list_del(&bulk->b_link);
151         bulk->b_desc->b_page_count--;
152         OBD_FREE(bulk, sizeof(*bulk));
153         EXIT;
154 }
155
156 struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
157                                        struct ptlrpc_connection *conn,
158                                        int opcode, int count, int *lengths,
159                                        char **bufs)
160 {
161         struct ptlrpc_request *request;
162         int rc;
163         ENTRY;
164
165         OBD_ALLOC(request, sizeof(*request));
166         if (!request) {
167                 CERROR("request allocation out of memory\n");
168                 RETURN(NULL);
169         }
170
171         rc = lustre_pack_msg(count, lengths, bufs,
172                              &request->rq_reqlen, &request->rq_reqmsg);
173         if (rc) {
174                 CERROR("cannot pack request %d\n", rc);
175                 OBD_FREE(request, sizeof(*request));
176                 RETURN(NULL);
177         }
178
179         request->rq_level = LUSTRE_CONN_FULL;
180         request->rq_type = PTL_RPC_TYPE_REQUEST;
181         request->rq_client = cl;
182         request->rq_connection = ptlrpc_connection_addref(conn);
183
184         INIT_LIST_HEAD(&request->rq_list);
185         INIT_LIST_HEAD(&request->rq_multi);
186         /* this will be dec()d once in req_finished, once in free_committed */
187         atomic_set(&request->rq_refcount, 2);
188
189         spin_lock(&conn->c_lock);
190         request->rq_xid = HTON__u32(++conn->c_xid_out);
191         spin_unlock(&conn->c_lock);
192
193         request->rq_reqmsg->magic = PTLRPC_MSG_MAGIC; 
194         request->rq_reqmsg->version = PTLRPC_MSG_VERSION;
195         request->rq_reqmsg->opc = HTON__u32(opcode);
196         request->rq_reqmsg->type = HTON__u32(PTL_RPC_MSG_REQUEST);
197
198         RETURN(request);
199 }
200 struct ptlrpc_request *ptlrpc_prep_req2(struct lustre_handle *conn, 
201                                        int opcode, int count, int *lengths,
202                                        char **bufs)
203 {
204         struct client_obd *clobd; 
205         struct ptlrpc_request *req;
206         struct obd_export *export;
207
208         export = class_conn2export(conn);
209         if (!export) { 
210                 LBUG();
211                 CERROR("NOT connected\n"); 
212                 return NULL;
213         }
214
215         clobd = &export->exp_obd->u.cli;
216         req = ptlrpc_prep_req(clobd->cl_client, clobd->cl_conn, 
217                               opcode, count, lengths, bufs);
218         ptlrpc_hdl2req(req, &clobd->cl_exporth);
219         return req;
220 }
221
222 void ptlrpc_req_finished(struct ptlrpc_request *request)
223 {
224         if (request == NULL)
225                 return;
226
227         if (request->rq_repmsg != NULL) { 
228                 OBD_FREE(request->rq_repmsg, request->rq_replen);
229                 request->rq_repmsg = NULL;
230                 request->rq_reply_md.start = NULL; 
231         }
232
233         if (atomic_dec_and_test(&request->rq_refcount))
234                 ptlrpc_free_req(request);
235 }
236
237 void ptlrpc_free_req(struct ptlrpc_request *request)
238 {
239         ENTRY;
240         if (request == NULL) {
241                 EXIT;
242                 return;
243         }
244
245         if (request->rq_repmsg != NULL)
246                 OBD_FREE(request->rq_repmsg, request->rq_replen);
247         if (request->rq_reqmsg != NULL)
248                 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
249
250         if (request->rq_client) {
251                 spin_lock(&request->rq_client->cli_lock);
252                 list_del_init(&request->rq_list);
253                 spin_unlock(&request->rq_client->cli_lock);
254         }
255
256         ptlrpc_put_connection(request->rq_connection);
257         list_del(&request->rq_multi);
258         OBD_FREE(request, sizeof(*request));
259         EXIT;
260 }
261
262 static int ptlrpc_check_reply(struct ptlrpc_request *req)
263 {
264         int rc = 0;
265
266         if (req->rq_repmsg != NULL) {
267                 req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
268                 req->rq_flags |= PTL_RPC_FL_REPLIED;
269                 GOTO(out, rc = 1);
270         }
271
272         if (req->rq_flags & PTL_RPC_FL_RECOVERY) { 
273                 CERROR("-- RESTART --\n");
274                 GOTO(out, rc = 1);
275         }
276
277  out:
278         CDEBUG(D_NET, "req = %p, rc = %d\n", req, rc);
279         return rc;
280 }
281
282 int ptlrpc_check_status(struct ptlrpc_request *req, int err)
283 {
284         ENTRY;
285
286         if (err != 0) {
287                 CERROR("err is %d\n", err);
288                 RETURN(err);
289         }
290
291         if (req == NULL) {
292                 CERROR("req == NULL\n");
293                 RETURN(-ENOMEM);
294         }
295
296         if (req->rq_repmsg == NULL) {
297                 CERROR("req->rq_repmsg == NULL\n");
298                 RETURN(-ENOMEM);
299         }
300
301         err = req->rq_repmsg->status;
302         if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
303                 CERROR("req->rq_repmsg->type == PTL_RPC_MSG_ERR\n");
304                 RETURN(err ? err : -EINVAL);
305         }
306
307         if (err != 0) {
308                 if (err < 0)
309                         CERROR("req->rq_repmsg->status is %d\n", err);
310                 else
311                         CDEBUG(D_INFO, "req->rq_repmsg->status is %d\n", err);
312                 /* XXX: translate this error from net to host */
313                 RETURN(err);
314         }
315
316         RETURN(0);
317 }
318
319 static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request)
320 {
321         OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
322         request->rq_reqmsg = NULL;
323         request->rq_reqlen = 0;
324 }
325
326 /* Abort this request and cleanup any resources associated with it. */
327 static int ptlrpc_abort(struct ptlrpc_request *request)
328 {
329         /* First remove the ME for the reply; in theory, this means
330          * that we can tear down the buffer safely. */
331         PtlMEUnlink(request->rq_reply_me_h);
332         OBD_FREE(request->rq_reply_md.start, request->rq_replen);
333         request->rq_repmsg = NULL;
334         request->rq_replen = 0;
335         return 0;
336 }
337
338 /* caller must lock cli */
339 void ptlrpc_free_committed(struct ptlrpc_client *cli)
340 {
341         struct list_head *tmp, *saved;
342         struct ptlrpc_request *req;
343
344         list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
345                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
346
347                 if ( (req->rq_flags & PTL_RPC_FL_REPLAY) ) { 
348                         CDEBUG(D_INFO, "Retaining request %Ld for replay\n",
349                                req->rq_xid);
350                         continue;
351                 }
352
353                 /* not yet committed */
354                 if (req->rq_transno > cli->cli_last_committed)
355                         break;
356
357                 CDEBUG(D_INFO, "Marking request xid %Ld as committed ("
358                        "transno=%Lu, last_committed=%Lu\n",
359                        (long long)req->rq_xid, (long long)req->rq_transno,
360                        (long long)cli->cli_last_committed);
361                 if (atomic_dec_and_test(&req->rq_refcount)) {
362                         /* we do this to prevent free_req deadlock */
363                         list_del_init(&req->rq_list); 
364                         req->rq_client = NULL;
365                         ptlrpc_free_req(req);
366                 } else {
367                         list_del_init(&req->rq_list);
368                         list_add(&req->rq_list, &cli->cli_dying_head);
369                 }
370         }
371
372         EXIT;
373         return;
374 }
375
376 void ptlrpc_cleanup_client(struct ptlrpc_client *cli)
377 {
378         struct list_head *tmp, *saved;
379         struct ptlrpc_request *req;
380         ENTRY;
381
382         spin_lock(&cli->cli_lock);
383         list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
384                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
385                 CDEBUG(D_INFO, "Cleaning req %p from sending list.\n", req);
386                 list_del_init(&req->rq_list);
387                 req->rq_client = NULL;
388                 ptlrpc_free_req(req); 
389         }
390         list_for_each_safe(tmp, saved, &cli->cli_dying_head) {
391                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
392                 CERROR("Request %p is on the dying list at cleanup!\n", req);
393                 list_del_init(&req->rq_list);
394                 req->rq_client = NULL;
395                 ptlrpc_free_req(req); 
396         }
397         spin_unlock(&cli->cli_lock);
398
399         EXIT;
400         return;
401 }
402
403 void ptlrpc_continue_req(struct ptlrpc_request *req)
404 {
405         ENTRY;
406         CDEBUG(D_INODE, "continue delayed request %Ld opc %d\n", 
407                req->rq_xid, req->rq_reqmsg->opc); 
408         wake_up(&req->rq_wait_for_rep); 
409         EXIT;
410 }
411
412 void ptlrpc_resend_req(struct ptlrpc_request *req)
413 {
414         ENTRY;
415         CDEBUG(D_INODE, "resend request %Ld, opc %d\n", 
416                req->rq_xid, req->rq_reqmsg->opc);
417         req->rq_status = -EAGAIN;
418         req->rq_level = LUSTRE_CONN_RECOVD;
419         req->rq_flags |= PTL_RPC_FL_RESEND;
420         req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
421         wake_up(&req->rq_wait_for_rep);
422         EXIT;
423 }
424
425 void ptlrpc_restart_req(struct ptlrpc_request *req)
426 {
427         ENTRY;
428         CDEBUG(D_INODE, "restart completed request %Ld, opc %d\n", 
429                req->rq_xid, req->rq_reqmsg->opc);
430         req->rq_status = -ERESTARTSYS;
431         req->rq_flags |= PTL_RPC_FL_RECOVERY;
432         req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
433         wake_up(&req->rq_wait_for_rep);
434         EXIT;
435 }
436
437 static int expired_request(void *data)
438 {
439         struct ptlrpc_request *req = data;
440         
441         ENTRY;
442         CERROR("req timeout on connid %d xid %Ld\n", req->rq_connid,
443                (unsigned long long)req->rq_xid);
444         req->rq_timeout = 0;
445         req->rq_connection->c_level = LUSTRE_CONN_RECOVD;
446         req->rq_flags |= PTL_RPC_FL_TIMEOUT;
447         /* Activate the recovd for this client, if there is one. */
448         if (req->rq_client && req->rq_client->cli_recovd)
449                 recovd_cli_fail(req->rq_client);
450
451         /* If this request is for recovery or other primordial tasks,
452          * don't go back to sleep.
453          */
454         if (req->rq_level < LUSTRE_CONN_FULL)
455                 RETURN(1);
456         RETURN(0);
457 }
458
459 static int interrupted_request(void *data)
460 {
461         struct ptlrpc_request *req = data;
462         ENTRY;
463         req->rq_flags |= PTL_RPC_FL_INTR;
464         RETURN(1); /* ignored, as of this writing */
465 }
466
467 int ptlrpc_queue_wait(struct ptlrpc_request *req)
468 {
469         int rc = 0;
470         struct l_wait_info lwi;
471         struct ptlrpc_client *cli = req->rq_client;
472         ENTRY;
473
474         init_waitqueue_head(&req->rq_wait_for_rep);
475         CDEBUG(D_NET, "subsys: %s req %Ld opc %d level %d, conn level %d\n",
476                cli->cli_name, req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
477                req->rq_connection->c_level);
478
479         /* XXX probably both an import and connection level are needed */
480         if (req->rq_level > req->rq_connection->c_level) { 
481                 CERROR("process %d waiting for recovery (%d > %d)\n", 
482                        current->pid, req->rq_level, req->rq_connection->c_level);
483
484                 spin_lock(&cli->cli_lock);
485                 list_del_init(&req->rq_list);
486                 list_add_tail(&req->rq_list, &cli->cli_delayed_head);
487                 spin_unlock(&cli->cli_lock);
488
489 #warning shaver: what happens when we get interrupted during this wait?
490                 lwi = LWI_INTR(SIGTERM | SIGKILL | SIGINT, NULL, NULL);
491                 l_wait_event(req->rq_wait_for_rep,
492                              req->rq_level <= req->rq_connection->c_level,
493                              &lwi);
494
495                 spin_lock(&cli->cli_lock);
496                 list_del_init(&req->rq_list);
497                 spin_unlock(&cli->cli_lock);
498
499                 CERROR("process %d resumed\n", current->pid);
500         }
501  resend:
502         req->rq_time = CURRENT_TIME;
503         req->rq_timeout = 100;
504         rc = ptl_send_rpc(req);
505         if (rc) {
506                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
507                 if ( rc > 0 ) 
508                         rc = -rc;
509                 ptlrpc_cleanup_request_buf(req);
510                 up(&cli->cli_rpc_sem);
511                 RETURN(-rc);
512         }
513
514         spin_lock(&cli->cli_lock);
515         list_del_init(&req->rq_list);
516         list_add_tail(&req->rq_list, &cli->cli_sending_head);
517         spin_unlock(&cli->cli_lock);
518
519         CDEBUG(D_OTHER, "-- sleeping\n");
520         lwi = LWI_TIMEOUT_INTR(req->rq_timeout * HZ, expired_request,
521                                SIGKILL | SIGTERM | SIGINT, interrupted_request,
522                                req);
523         l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
524         CDEBUG(D_OTHER, "-- done\n");
525
526         /* Don't resend if we were interrupted. */
527         if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) ==
528             PTL_RPC_FL_RESEND) {
529                 req->rq_flags &= ~PTL_RPC_FL_RESEND;
530                 goto resend;
531         }
532
533         up(&cli->cli_rpc_sem);
534         if (req->rq_flags & PTL_RPC_FL_INTR) {
535                 if (!(req->rq_flags & PTL_RPC_FL_TIMEOUT))
536                         LBUG(); /* should only be interrupted if we timed out. */
537                 /* Clean up the dangling reply buffers */
538                 ptlrpc_abort(req);
539                 GOTO(out, rc = -EINTR);
540         }
541
542         if (req->rq_flags & PTL_RPC_FL_TIMEOUT)
543                 GOTO(out, rc = -ETIMEDOUT);
544
545         if (!(req->rq_flags & PTL_RPC_FL_REPLIED))
546                 GOTO(out, rc = req->rq_status);
547
548         rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
549         if (rc) {
550                 CERROR("unpack_rep failed: %d\n", rc);
551                 GOTO(out, rc);
552         }
553         CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
554         if (req->rq_repmsg->status == 0)
555                 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
556                        req->rq_replen, req->rq_repmsg->status);
557
558         spin_lock(&cli->cli_lock);
559         cli->cli_last_xid = req->rq_repmsg->last_xid;
560         cli->cli_last_committed = req->rq_repmsg->last_committed;
561         ptlrpc_free_committed(cli); 
562         spin_unlock(&cli->cli_lock);
563
564         EXIT;
565  out:
566         return rc;
567 }
568
569 int ptlrpc_replay_req(struct ptlrpc_request *req)
570 {
571         int rc = 0;
572         struct ptlrpc_client *cli = req->rq_client;
573         struct l_wait_info lwi = LWI_INTR(SIGKILL|SIGTERM|SIGINT, NULL, NULL);
574         ENTRY;
575
576         init_waitqueue_head(&req->rq_wait_for_rep);
577         CDEBUG(D_NET, "req %Ld opc %d level %d, conn level %d\n",
578                req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
579                req->rq_connection->c_level);
580
581         req->rq_time = CURRENT_TIME;
582         req->rq_timeout = 100;
583         rc = ptl_send_rpc(req);
584         if (rc) {
585                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
586                 ptlrpc_cleanup_request_buf(req);
587                 up(&cli->cli_rpc_sem);
588                 RETURN(-rc);
589         }
590
591         CDEBUG(D_OTHER, "-- sleeping\n");
592         l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
593         CDEBUG(D_OTHER, "-- done\n");
594
595         up(&cli->cli_rpc_sem);
596
597         if (!(req->rq_flags & PTL_RPC_FL_REPLIED)) {
598                 CERROR("Unknown reason for wakeup\n");
599                 /* XXX Phil - I end up here when I kill obdctl */
600                 ptlrpc_abort(req);
601                 GOTO(out, rc = -EINTR);
602         }
603
604         rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
605         if (rc) {
606                 CERROR("unpack_rep failed: %d\n", rc);
607                 GOTO(out, rc);
608         }
609
610         CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
611         if (req->rq_repmsg->status == 0)
612                 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
613                        req->rq_replen, req->rq_repmsg->status);
614         else {
615                 CERROR("recovery failed: "); 
616                 CERROR("req %Ld opc %d level %d, conn level %d\n", 
617                        req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
618                        req->rq_connection->c_level);
619                 LBUG();
620         }
621
622  out:
623         RETURN(rc);
624 }