Whamcloud - gitweb
- mds failover code
[fs/lustre-release.git] / lustre / ptlrpc / client.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22
23 #define EXPORT_SYMTAB
24
25 #define DEBUG_SUBSYSTEM S_RPC
26
27 #include <linux/lustre_ha.h>
28
29 void ptlrpc_init_client(struct recovd_obd *recovd, 
30                         int (*recover)(struct ptlrpc_client *recover),
31                         int req_portal,
32                         int rep_portal, struct ptlrpc_client *cl)
33 {
34         memset(cl, 0, sizeof(*cl));
35         cl->cli_recovd = recovd;
36         cl->cli_recover = recover;
37         if (recovd)
38                 recovd_cli_manage(recovd, cl);
39         cl->cli_obd = NULL;
40         cl->cli_request_portal = req_portal;
41         cl->cli_reply_portal = rep_portal;
42         INIT_LIST_HEAD(&cl->cli_delayed_head);
43         INIT_LIST_HEAD(&cl->cli_sending_head);
44         INIT_LIST_HEAD(&cl->cli_dying_head);
45         spin_lock_init(&cl->cli_lock);
46         sema_init(&cl->cli_rpc_sem, 32);
47 }
48
49 __u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
50 {
51         return req->rq_connection->c_remote_uuid;
52 }
53
54 struct ptlrpc_connection *ptlrpc_uuid_to_connection(char *uuid)
55 {
56         struct ptlrpc_connection *c;
57         struct lustre_peer peer;
58         int err;
59
60         err = kportal_uuid_to_peer(uuid, &peer);
61         if (err != 0) {
62                 CERROR("cannot find peer %s!\n", uuid);
63                 return NULL;
64         }
65
66         c = ptlrpc_get_connection(&peer);
67         if (c)
68                 c->c_epoch++;
69
70         return c;
71 }
72
73 void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, char *uuid)
74 {
75         struct lustre_peer peer;
76         int err;
77
78         err = kportal_uuid_to_peer(uuid, &peer);
79         if (err != 0) {
80                 CERROR("cannot find peer %s!\n", uuid);
81                 return;
82         }
83         
84         memcpy(&conn->c_peer, &peer, sizeof(peer)); 
85         return;
86 }
87
88 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn)
89 {
90         struct ptlrpc_bulk_desc *bulk;
91
92         OBD_ALLOC(bulk, sizeof(*bulk));
93         if (bulk != NULL) {
94                 bulk->b_connection = ptlrpc_connection_addref(conn);
95                 init_waitqueue_head(&bulk->b_waitq);
96         }
97
98         return bulk;
99 }
100
101 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk)
102 {
103         ENTRY;
104         if (bulk == NULL) {
105                 EXIT;
106                 return;
107         }
108
109         ptlrpc_put_connection(bulk->b_connection);
110
111         OBD_FREE(bulk, sizeof(*bulk));
112         EXIT;
113 }
114
115 struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
116                                        struct ptlrpc_connection *conn,
117                                        int opcode, int count, int *lengths,
118                                        char **bufs)
119 {
120         struct ptlrpc_request *request;
121         int rc;
122         ENTRY;
123
124         OBD_ALLOC(request, sizeof(*request));
125         if (!request) {
126                 CERROR("request allocation out of memory\n");
127                 RETURN(NULL);
128         }
129
130         rc = lustre_pack_msg(count, lengths, bufs,
131                              &request->rq_reqlen, &request->rq_reqmsg);
132         if (rc) {
133                 CERROR("cannot pack request %d\n", rc);
134                 RETURN(NULL);
135         }
136
137         request->rq_type = PTL_RPC_TYPE_REQUEST;
138         request->rq_connection = ptlrpc_connection_addref(conn);
139
140         request->rq_reqmsg->conn = (__u64)(unsigned long)conn->c_remote_conn;
141         request->rq_reqmsg->token = conn->c_remote_token;
142         request->rq_reqmsg->opc = HTON__u32(opcode);
143         request->rq_reqmsg->type = HTON__u32(PTL_RPC_MSG_REQUEST);
144         INIT_LIST_HEAD(&request->rq_list);
145
146         /* this will be dec()d once in req_finished, once in free_committed */
147         atomic_set(&request->rq_refcount, 2);
148
149         spin_lock(&conn->c_lock);
150         request->rq_reqmsg->xid = HTON__u32(++conn->c_xid_out);
151         request->rq_xid = conn->c_xid_out;
152         spin_unlock(&conn->c_lock);
153
154         request->rq_client = cl;
155
156         RETURN(request);
157 }
158
159 void ptlrpc_req_finished(struct ptlrpc_request *request)
160 {
161         if (request == NULL)
162                 return;
163
164         if (request->rq_repmsg != NULL) { 
165                 OBD_FREE(request->rq_repmsg, request->rq_replen);
166                 request->rq_repmsg = NULL;
167                 request->rq_reply_md.start = NULL; 
168         }
169
170         if (atomic_dec_and_test(&request->rq_refcount))
171                 ptlrpc_free_req(request);
172 }
173
174 void ptlrpc_free_req(struct ptlrpc_request *request)
175 {
176         if (request == NULL)
177                 return;
178
179         if (request->rq_repmsg != NULL)
180                 OBD_FREE(request->rq_repmsg, request->rq_replen);
181         if (request->rq_reqmsg != NULL)
182                 OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
183
184         if (request->rq_client) {
185                 spin_lock(&request->rq_client->cli_lock);
186                 list_del_init(&request->rq_list);
187                 spin_unlock(&request->rq_client->cli_lock);
188         }
189
190         ptlrpc_put_connection(request->rq_connection);
191
192         OBD_FREE(request, sizeof(*request));
193 }
194
195 static int ptlrpc_check_reply(struct ptlrpc_request *req)
196 {
197         int rc = 0;
198
199         if (req->rq_repmsg != NULL) {
200                 req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
201                 req->rq_flags |= PTL_RPC_FL_REPLIED;
202                 GOTO(out, rc = 1);
203         }
204
205         if (req->rq_flags & PTL_RPC_FL_RESEND) { 
206                 CERROR("-- RESEND --\n");
207                 GOTO(out, rc = 1);
208         }
209
210         if (req->rq_flags & PTL_RPC_FL_RECOVERY) { 
211                 CERROR("-- RESTART --\n");
212                 GOTO(out, rc = 1);
213         }
214
215
216         if (CURRENT_TIME - req->rq_time >= req->rq_timeout) {
217                 CERROR("-- REQ TIMEOUT --\n");
218                 /* clear the timeout */
219                 req->rq_timeout = 0;
220                 req->rq_connection->c_level = LUSTRE_CONN_RECOVD;
221                 req->rq_flags |= PTL_RPC_FL_TIMEOUT;
222                 if (req->rq_client && req->rq_client->cli_recovd)
223                         recovd_cli_fail(req->rq_client);
224                 if (req->rq_level < LUSTRE_CONN_FULL)
225                         rc = -ETIMEDOUT;
226                 else 
227                         rc = 0;
228
229                 GOTO(out, rc);
230         }
231
232         if (req->rq_timeout) { 
233                 schedule_timeout(req->rq_timeout * HZ);
234         }
235
236         if (sigismember(&(current->pending.signal), SIGKILL) ||
237             sigismember(&(current->pending.signal), SIGTERM) ||
238             sigismember(&(current->pending.signal), SIGINT)) {
239                 req->rq_flags |= PTL_RPC_FL_INTR;
240                 GOTO(out, rc = 1);
241         }
242
243  out:
244         return rc;
245 }
246
247 int ptlrpc_check_status(struct ptlrpc_request *req, int err)
248 {
249         ENTRY;
250
251         if (err != 0) {
252                 CERROR("err is %d\n", err);
253                 RETURN(err);
254         }
255
256         if (req == NULL) {
257                 CERROR("req == NULL\n");
258                 RETURN(-ENOMEM);
259         }
260
261         if (req->rq_repmsg == NULL) {
262                 CERROR("req->rq_repmsg == NULL\n");
263                 RETURN(-ENOMEM);
264         }
265
266         if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
267                 CERROR("req->rq_repmsg->type == PTL_RPC_MSG_ERR\n");
268                 RETURN(-EINVAL);
269         }
270
271         if (req->rq_repmsg->status != 0) {
272                 CERROR("req->rq_repmsg->status is %d\n",
273                        req->rq_repmsg->status);
274                 /* XXX: translate this error from net to host */
275                 RETURN(req->rq_repmsg->status);
276         }
277
278         RETURN(0);
279 }
280
281 static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request)
282 {
283         OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
284         request->rq_reqmsg = NULL;
285         request->rq_reqlen = 0;
286 }
287
288 /* Abort this request and cleanup any resources associated with it. */
289 static int ptlrpc_abort(struct ptlrpc_request *request)
290 {
291         /* First remove the ME for the reply; in theory, this means
292          * that we can tear down the buffer safely. */
293         PtlMEUnlink(request->rq_reply_me_h);
294         OBD_FREE(request->rq_reply_md.start, request->rq_replen);
295         request->rq_repmsg = NULL;
296         request->rq_replen = 0;
297         return 0;
298 }
299
300 /* caller must lock cli */
301 void ptlrpc_free_committed(struct ptlrpc_client *cli)
302 {
303         struct list_head *tmp, *saved;
304         struct ptlrpc_request *req;
305
306         list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
307                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
308
309                 if ( (req->rq_flags & PTL_RPC_FL_REPLAY) ) { 
310                         CDEBUG(D_INFO, "Retaining request %Ld for replay\n",
311                                req->rq_xid);
312                         continue;
313                 }
314                         
315                 /* not yet committed */ 
316                 if (!req->rq_transno ||
317                     req->rq_transno > cli->cli_last_committed)
318                         break; 
319
320                 CDEBUG(D_INFO, "Marking request %Ld as committed ("
321                        "transno=%Lu, last_committed=%Lu\n", 
322                        req->rq_xid, req->rq_transno, 
323                        cli->cli_last_committed);
324                 if (atomic_dec_and_test(&req->rq_refcount)) {
325                         /* we do this to prevent free_req deadlock */
326                         list_del_init(&req->rq_list); 
327                         req->rq_client = NULL;
328                         ptlrpc_free_req(req);
329                 } else {
330                         list_del_init(&req->rq_list);
331                         list_add(&req->rq_list, &cli->cli_dying_head);
332                 }
333         }
334
335         EXIT;
336         return;
337 }
338
339 void ptlrpc_cleanup_client(struct ptlrpc_client *cli)
340 {
341         struct list_head *tmp, *saved;
342         struct ptlrpc_request *req;
343         ENTRY;
344
345         spin_lock(&cli->cli_lock);
346         list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
347                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
348                 CDEBUG(D_INFO, "Cleaning req %p from sending list.\n", req);
349                 list_del_init(&req->rq_list);
350                 req->rq_client = NULL;
351                 ptlrpc_free_req(req); 
352         }
353         list_for_each_safe(tmp, saved, &cli->cli_dying_head) {
354                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
355                 CERROR("Request %p is on the dying list at cleanup!\n", req);
356                 list_del_init(&req->rq_list);
357                 req->rq_client = NULL;
358                 ptlrpc_free_req(req); 
359         }
360         spin_unlock(&cli->cli_lock);
361
362         EXIT;
363         return;
364 }
365
366 void ptlrpc_continue_req(struct ptlrpc_request *req)
367 {
368         ENTRY;
369         CDEBUG(D_INODE, "continue delayed request %Ld opc %d\n", 
370                req->rq_xid, req->rq_reqmsg->opc); 
371         wake_up_interruptible(&req->rq_wait_for_rep); 
372         EXIT;
373 }
374
375 void ptlrpc_resend_req(struct ptlrpc_request *req)
376 {
377         ENTRY;
378         CDEBUG(D_INODE, "resend request %Ld, opc %d\n", 
379                req->rq_xid, req->rq_reqmsg->opc);
380         req->rq_status = -EAGAIN;
381         req->rq_level = LUSTRE_CONN_RECOVD;
382         req->rq_flags |= PTL_RPC_FL_RESEND;
383         req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
384         wake_up_interruptible(&req->rq_wait_for_rep);
385         EXIT;
386 }
387
388 void ptlrpc_restart_req(struct ptlrpc_request *req)
389 {
390         ENTRY;
391         CDEBUG(D_INODE, "restart completed request %Ld, opc %d\n", 
392                req->rq_xid, req->rq_reqmsg->opc);
393         req->rq_status = -ERESTARTSYS;
394         req->rq_flags |= PTL_RPC_FL_RECOVERY;
395         req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
396         wake_up_interruptible(&req->rq_wait_for_rep);
397         EXIT;
398 }
399
400 int ptlrpc_queue_wait(struct ptlrpc_request *req)
401 {
402         int rc = 0;
403         struct ptlrpc_client *cli = req->rq_client;
404         ENTRY;
405
406         init_waitqueue_head(&req->rq_wait_for_rep);
407         CERROR("subsys: %s req %Ld opc %d level %d, conn level %d\n", 
408                cli->cli_name, req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
409                req->rq_connection->c_level);
410
411         /* XXX probably both an import and connection level are needed */
412         if (req->rq_level > req->rq_connection->c_level) { 
413                 CERROR("process %d waiting for recovery\n", current->pid);
414                 spin_lock(&cli->cli_lock);
415                 list_del_init(&req->rq_list);
416                 list_add(&req->rq_list, cli->cli_delayed_head.prev); 
417                 spin_unlock(&cli->cli_lock);
418                 wait_event_interruptible
419                         (req->rq_wait_for_rep, 
420                          req->rq_level <= req->rq_connection->c_level);
421                 spin_lock(&cli->cli_lock);
422                 list_del_init(&req->rq_list);
423                 spin_unlock(&cli->cli_lock);
424                 CERROR("process %d resumed\n", current->pid);
425         }
426  resend:
427         req->rq_time = CURRENT_TIME;
428         req->rq_timeout = 30;
429         rc = ptl_send_rpc(req);
430         if (rc) {
431                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
432                 if ( rc > 0 ) 
433                         rc = -rc;
434                 ptlrpc_cleanup_request_buf(req);
435                 up(&cli->cli_rpc_sem);
436                 RETURN(-rc);
437         }
438
439         spin_lock(&cli->cli_lock);
440         list_del_init(&req->rq_list);
441         list_add(&req->rq_list, cli->cli_sending_head.prev);
442         spin_unlock(&cli->cli_lock);
443
444         CDEBUG(D_OTHER, "-- sleeping\n");
445         wait_event_interruptible(req->rq_wait_for_rep, 
446                                  ptlrpc_check_reply(req));
447         CDEBUG(D_OTHER, "-- done\n");
448
449         if (req->rq_flags & PTL_RPC_FL_RESEND) {
450                 req->rq_flags &= ~PTL_RPC_FL_RESEND;
451                 goto resend;
452         }
453
454         up(&cli->cli_rpc_sem);
455         if (req->rq_flags & PTL_RPC_FL_INTR) {
456                 /* Clean up the dangling reply buffers */
457                 ptlrpc_abort(req);
458                 GOTO(out, rc = -EINTR);
459         }
460
461         if (! (req->rq_flags & PTL_RPC_FL_REPLIED)) {
462                 GOTO(out, rc = req->rq_status);
463         }
464
465         rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
466         if (rc) {
467                 CERROR("unpack_rep failed: %d\n", rc);
468                 GOTO(out, rc);
469         }
470         CDEBUG(D_NET, "got rep %d\n", req->rq_repmsg->xid);
471         if (req->rq_repmsg->status == 0)
472                 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
473                        req->rq_replen, req->rq_repmsg->status);
474
475         spin_lock(&cli->cli_lock);
476         cli->cli_last_rcvd = req->rq_repmsg->last_rcvd;
477         cli->cli_last_committed = req->rq_repmsg->last_committed;
478         ptlrpc_free_committed(cli); 
479         spin_unlock(&cli->cli_lock);
480
481         EXIT;
482  out:
483         return rc;
484 }
485
486 int ptlrpc_replay_req(struct ptlrpc_request *req)
487 {
488         int rc = 0;
489         struct ptlrpc_client *cli = req->rq_client;
490         ENTRY;
491
492         init_waitqueue_head(&req->rq_wait_for_rep);
493         CERROR("req %Ld opc %d level %d, conn level %d\n", 
494                req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
495                req->rq_connection->c_level);
496
497         req->rq_time = CURRENT_TIME;
498         req->rq_timeout = 3;
499         rc = ptl_send_rpc(req);
500         if (rc) {
501                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
502                 ptlrpc_cleanup_request_buf(req);
503                 up(&cli->cli_rpc_sem);
504                 RETURN(-rc);
505         }
506
507         CDEBUG(D_OTHER, "-- sleeping\n");
508         wait_event_interruptible(req->rq_wait_for_rep, 
509                                  ptlrpc_check_reply(req));
510         CDEBUG(D_OTHER, "-- done\n");
511
512         up(&cli->cli_rpc_sem);
513
514         if (!(req->rq_flags & PTL_RPC_FL_REPLIED)) {
515                 CERROR("Unknown reason for wakeup\n");
516                 /* XXX Phil - I end up here when I kill obdctl */
517                 ptlrpc_abort(req);
518                 GOTO(out, rc = -EINTR);
519         }
520
521         rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
522         if (rc) {
523                 CERROR("unpack_rep failed: %d\n", rc);
524                 GOTO(out, rc);
525         }
526
527         CDEBUG(D_NET, "got rep %d\n", req->rq_repmsg->xid);
528         if (req->rq_repmsg->status == 0)
529                 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
530                        req->rq_replen, req->rq_repmsg->status);
531         else {
532                 CERROR("recovery failed: "); 
533                 CERROR("req %Ld opc %d level %d, conn level %d\n", 
534                        req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
535                        req->rq_connection->c_level);
536                 LBUG();
537         }
538
539  out:
540         RETURN(rc);
541 }