1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Lustre Light Super operations
6 * This code is issued under the GNU General Public License.
7 * See the file COPYING in this distribution
9 * Copryright (C) 1996 Peter J. Braam <braam@stelias.com>
10 * Copryright (C) 1999 Stelias Computing Inc. <braam@stelias.com>
11 * Copryright (C) 1999 Seagate Technology Inc.
12 * Copryright (C) 2001 Mountain View Data, Inc.
13 * Copryright (C) 2002 Cluster File Systems, Inc.
17 #include <linux/config.h>
18 #include <linux/module.h>
20 #define DEBUG_SUBSYSTEM S_LLITE
22 #include <linux/lustre_lite.h>
23 #include <linux/lustre_ha.h>
26 static int ll_reconnect(struct ll_sb_info *sbi)
28 struct ll_fid rootfid;
29 __u64 last_committed, last_rcvd;
32 struct ptlrpc_request *request;
34 ptlrpc_readdress_connection(sbi2mdc(sbi)->mdc_conn, "mds");
36 err = connmgr_connect(ptlrpc_connmgr, sbi2mdc(sbi)->mdc_conn);
38 CERROR("cannot connect to MDS: rc = %d\n", err);
39 ptlrpc_put_connection(sbi2mdc(sbi)->mdc_conn);
40 GOTO(out_disc, err = -ENOTCONN);
42 sbi2mdc(sbi)->mdc_conn->c_level = LUSTRE_CONN_CON;
44 /* XXX: need to store the last_* values somewhere */
45 err = mdc_getstatus(&sbi->ll_mdc_conn,
46 &rootfid, &last_committed,
51 CERROR("cannot mds_connect: rc = %d\n", err);
52 GOTO(out_disc, err = -ENOTCONN);
54 sbi2mdc(sbi)->mdc_client->cli_last_rcvd = last_xid;
55 sbi2mdc(sbi)->mdc_conn->c_level = LUSTRE_CONN_RECOVD;
62 int ll_recover(struct ptlrpc_client *cli)
64 struct ptlrpc_request *req;
65 struct list_head *tmp, *pos;
66 struct ll_sb_info *sbi = cli->cli_data;
73 /* 2. walk the request list */
74 spin_lock(&cli->cli_lock);
75 list_for_each_safe(tmp, pos, &cli->cli_sending_head) {
76 req = list_entry(tmp, struct ptlrpc_request, rq_list);
78 /* replay what needs to be replayed */
79 if (req->rq_flags & PTL_RPC_FL_REPLAY) {
80 CDEBUG(D_INODE, "req %Ld needs replay [last rcvd %Ld]\n",
81 req->rq_xid, cli->cli_last_rcvd);
82 rc = ptlrpc_replay_req(req);
84 CERROR("recovery replay error %d for request %Ld\n",
90 /* server has seen req, we have reply: skip */
91 if ((req->rq_flags & PTL_RPC_FL_REPLIED) &&
92 req->rq_xid <= cli->cli_last_rcvd) {
93 CDEBUG(D_INODE, "req %Ld was complete: skip [last rcvd %Ld]\n",
94 req->rq_xid, cli->cli_last_rcvd);
98 /* server has lost req, we have reply: resend, ign reply */
99 if ((req->rq_flags & PTL_RPC_FL_REPLIED) &&
100 req->rq_xid > cli->cli_last_rcvd) {
101 CDEBUG(D_INODE, "lost req %Ld have rep: replay [last rcvd %Ld]\n",
102 req->rq_xid, cli->cli_last_rcvd);
103 rc = ptlrpc_replay_req(req);
105 CERROR("request resend error %d for request %Ld\n",
111 /* server has seen req, we have lost reply: -ERESTARTSYS */
112 if ( !(req->rq_flags & PTL_RPC_FL_REPLIED) &&
113 req->rq_xid <= cli->cli_last_rcvd) {
114 CDEBUG(D_INODE, "lost rep %Ld srv did req: restart [last rcvd %Ld]\n",
115 req->rq_xid, cli->cli_last_rcvd);
116 ptlrpc_restart_req(req);
119 /* service has not seen req, no reply: resend */
120 if ( !(req->rq_flags & PTL_RPC_FL_REPLIED) &&
121 req->rq_xid > cli->cli_last_rcvd) {
122 CDEBUG(D_INODE, "lost rep/req %Ld: resend [last rcvd %Ld]\n",
123 req->rq_xid, cli->cli_last_rcvd);
124 ptlrpc_resend_req(req);
129 sbi2mdc(sbi)->mdc_conn->c_level = LUSTRE_CONN_FULL;
130 recovd_cli_fixed(cli);
132 /* Finally, continue what we delayed since recovery started */
133 list_for_each_safe(tmp, pos, &cli->cli_delayed_head) {
134 req = list_entry(tmp, struct ptlrpc_request, rq_list);
135 ptlrpc_continue_req(req);
140 spin_unlock(&cli->cli_lock);