1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Lustre Light Super operations
6 * This code is issued under the GNU General Public License.
7 * See the file COPYING in this distribution
9 * Copryright (C) 1996 Peter J. Braam <braam@stelias.com>
10 * Copryright (C) 1999 Stelias Computing Inc. <braam@stelias.com>
11 * Copryright (C) 1999 Seagate Technology Inc.
12 * Copryright (C) 2001 Mountain View Data, Inc.
13 * Copryright (C) 2002 Cluster File Systems, Inc.
17 #include <linux/config.h>
18 #include <linux/module.h>
20 #define DEBUG_SUBSYSTEM S_LLITE
22 #include <linux/lustre_lite.h>
23 #include <linux/lustre_ha.h>
25 static int ll_reconnect(struct ll_sb_info *sbi)
27 struct ll_fid rootfid;
31 struct ptlrpc_request *request;
33 ptlrpc_readdress_connection(sbi2mdc(sbi)->cl_conn, "mds");
35 sbi2mdc(sbi)->cl_conn->c_level = LUSTRE_CONN_CON;
37 /* XXX: need to store the last_* values somewhere */
38 err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid, &last_committed,
41 CERROR("cannot mds_connect: rc = %d\n", err);
42 GOTO(out_disc, err = -ENOTCONN);
44 sbi2mdc(sbi)->cl_conn->c_last_xid = last_xid;
45 sbi2mdc(sbi)->cl_conn->c_level = LUSTRE_CONN_RECOVD;
51 int ll_recover(struct ptlrpc_client *cli)
55 /* XXXshaver this code needs to know about connection-driven recovery! */
57 struct ptlrpc_request *req;
58 struct list_head *tmp, *pos;
59 struct ll_sb_info *sbi = cli->cli_data;
60 struct ptlrpc_connection *conn = cli->cli_connection;
67 /* 2. walk the request list */
68 spin_lock(&conn->c_lock);
69 list_for_each_safe(tmp, pos, &conn->c_sending_head) {
70 req = list_entry(tmp, struct ptlrpc_request, rq_list);
72 /* replay what needs to be replayed */
73 if (req->rq_flags & PTL_RPC_FL_REPLAY) {
74 CDEBUG(D_INODE, "req %Ld needs replay [last rcvd %Ld]\n",
75 req->rq_xid, conn->c_last_xid);
76 rc = ptlrpc_replay_req(req);
78 CERROR("recovery replay error %d for req %Ld\n",
84 /* server has seen req, we have reply: skip */
85 if ((req->rq_flags & PTL_RPC_FL_REPLIED) &&
86 req->rq_xid <= conn->c_last_xid) {
88 "req %Ld was complete: skip [last rcvd %Ld]\n",
89 req->rq_xid, conn->c_last_xid);
93 /* server has lost req, we have reply: resend, ign reply */
94 if ((req->rq_flags & PTL_RPC_FL_REPLIED) &&
95 req->rq_xid > conn->c_last_xid) {
96 CDEBUG(D_INODE, "lost req %Ld have rep: replay [last "
97 "rcvd %Ld]\n", req->rq_xid, conn->c_last_xid);
98 rc = ptlrpc_replay_req(req);
100 CERROR("request resend error %d for req %Ld\n",
106 /* server has seen req, we have lost reply: -ERESTARTSYS */
107 if ( !(req->rq_flags & PTL_RPC_FL_REPLIED) &&
108 req->rq_xid <= conn->c_last_xid) {
109 CDEBUG(D_INODE, "lost rep %Ld srv did req: restart "
111 req->rq_xid, conn->c_last_xid);
112 ptlrpc_restart_req(req);
115 /* service has not seen req, no reply: resend */
116 if ( !(req->rq_flags & PTL_RPC_FL_REPLIED) &&
117 req->rq_xid > conn->c_last_xid) {
119 "lost rep/req %Ld: resend [last rcvd %Ld]\n",
120 req->rq_xid, conn->c_last_xid);
121 ptlrpc_resend_req(req);
126 sbi2mdc(sbi)->cl_conn->c_level = LUSTRE_CONN_FULL;
127 recovd_conn_fixed(conn);
129 /* Finally, continue what we delayed since recovery started */
130 list_for_each_safe(tmp, pos, &conn->c_delayed_head) {
131 req = list_entry(tmp, struct ptlrpc_request, rq_list);
132 ptlrpc_continue_req(req);
137 spin_unlock(&conn->c_lock);