lustre/ptlrpc/recover.c

   1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   2  * vim:expandtab:shiftwidth=8:tabstop=8:
   3  *
   4  * Portal-RPC reconnection and replay operations, for use in recovery.
   5  *
   6  * This code is issued under the GNU General Public License.
   7  * See the file COPYING in this distribution
   8  *
   9  * Copryright (C) 1996 Peter J. Braam <braam@stelias.com>
  10  * Copryright (C) 1999 Stelias Computing Inc. <braam@stelias.com>
  11  * Copryright (C) 1999 Seagate Technology Inc.
  12  * Copryright (C) 2001 Mountain View Data, Inc.
  13  * Copryright (C) 2002 Cluster File Systems, Inc.
  14  *
  15  */
  16
  17 #include <linux/config.h>
  18 #include <linux/module.h>
  19 #include <linux/kmod.h>
  20
  21 #define DEBUG_SUBSYSTEM S_RPC
  22
  23 #include <linux/lustre_ha.h>
  24 #include <linux/lustre_net.h>
  25 #include <linux/obd.h>
  26
  27 static int ptlrpc_reconnect(struct ptlrpc_connection *conn)
  28 {
  29         struct list_head *tmp;
  30         int rc = -EINVAL;
  31
  32         /* XXX c_lock semantics! */
  33         conn->c_level = LUSTRE_CONN_CON;
  34
  35         /* XXX this code MUST be shared with class_obd_connect! */
  36         list_for_each(tmp, &conn->c_imports) {
  37                 struct obd_import *imp = list_entry(tmp, struct obd_import,
  38                                                     imp_chain);
  39                 struct obd_device *obd = imp->imp_obd;
  40                 struct client_obd *cli = &obd->u.cli;
  41                 int rq_opc = (obd->obd_type->typ_ops->o_brw)
  42                         ? OST_CONNECT : MDS_CONNECT;
  43                 int size[] = { sizeof(cli->cl_target_uuid),
  44                                sizeof(obd->obd_uuid) };
  45                 char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid };
  46                 struct lustre_handle old_hdl;
  47                 struct ptlrpc_request *request;
  48                 struct obd_export *ldlmexp;
  49
  50                 LASSERT(imp->imp_connection == conn);
  51                 request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
  52                 request->rq_level = LUSTRE_CONN_NEW;
  53                 request->rq_replen = lustre_msg_size(0, NULL);
  54                 /*
  55                  * This address is the export that represents our client-side
  56                  * LDLM service (for ASTs).  We should only have one on this
  57                  * list, so we just grab the first one.
  58                  *
  59                  * XXX tear down export, call class_obd_connect!
  60                  */
  61                 ldlmexp = list_entry(obd->obd_exports.next, struct obd_export,
  62                                      exp_obd_chain);
  63                 request->rq_reqmsg->addr = (__u64)(unsigned long)ldlmexp;
  64                 request->rq_reqmsg->cookie = ldlmexp->exp_cookie;
  65                 rc = ptlrpc_queue_wait(request);
  66                 rc = ptlrpc_check_status(request, rc);
  67                 if (rc) {
  68                         CERROR("cannot connect to %s@%s: rc = %d\n",
  69                                cli->cl_target_uuid, conn->c_remote_uuid, rc);
  70                         ptlrpc_free_req(request);
  71                         GOTO(out_disc, rc = -ENOTCONN);
  72                 }
  73
  74                 old_hdl = imp->imp_handle;
  75                 imp->imp_handle.addr = request->rq_repmsg->addr;
  76                 imp->imp_handle.cookie = request->rq_repmsg->cookie;
  77                 CERROR("reconnected to %s@%s (%Lx/%Lx, was %Lx/%Lx)!\n",
  78                        cli->cl_target_uuid, conn->c_remote_uuid,
  79                        imp->imp_handle.addr, imp->imp_handle.cookie,
  80                        old_hdl.addr, old_hdl.cookie);
  81                 ptlrpc_free_req(request);
  82         }
  83         conn->c_level = LUSTRE_CONN_RECOVD;
  84
  85  out_disc:
  86         return rc;
  87 }
  88
  89 int ptlrpc_run_recovery_upcall(struct ptlrpc_connection *conn)
  90 {
  91         char *argv[3];
  92         char *envp[3];
  93         int rc;
  94
  95         ENTRY;
  96         conn->c_level = LUSTRE_CONN_RECOVD;
  97
  98         argv[0] = obd_recovery_upcall;
  99         argv[1] = conn->c_remote_uuid;
 100         argv[2] = NULL;
 101
 102         envp[0] = "HOME=/";
 103         envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
 104         envp[2] = NULL;
 105
 106         rc = call_usermodehelper(argv[0], argv, envp);
 107         if (rc < 0) {
 108                 CERROR("Error invoking recovery upcall %s for %s: %d\n",
 109                        argv[0], argv[1], rc);
 110                 CERROR("Check /proc/sys/lustre/recovery_upcall?\n");
 111         } else {
 112                 CERROR("Invoked upcall %s for connection %s\n",
 113                        argv[0], argv[1]);
 114         }
 115
 116         /*
 117          * We don't want to make this a "failed" recovery, because the system
 118          * administrator -- or, perhaps, tester -- may well be able to rescue
 119          * things by running the correct upcall.
 120          */
 121         RETURN(0);
 122 }
 123
 124 int ptlrpc_reconnect_and_replay(struct ptlrpc_connection *conn)
 125 {
 126         int rc = 0;
 127         struct list_head *tmp, *pos;
 128         struct ptlrpc_request *req;
 129         ENTRY;
 130
 131         /* 1. reconnect */
 132         rc = ptlrpc_reconnect(conn);
 133         if (rc)
 134                 RETURN(rc);
 135
 136         /* 2. walk the request list */
 137         spin_lock(&conn->c_lock);
 138
 139         CDEBUG(D_HA, "connection %p to %s has last_xid "LPD64"\n",
 140                conn, conn->c_remote_uuid, conn->c_last_xid);
 141
 142         list_for_each_safe(tmp, pos, &conn->c_sending_head) {
 143                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
 144
 145                 /* replay what needs to be replayed */
 146                 if (req->rq_flags & PTL_RPC_FL_REPLAY) {
 147                         CDEBUG(D_HA, "FL_REPLAY: xid "LPD64" transno "LPD64" op %d @ %d\n",
 148                                req->rq_xid, req->rq_repmsg->transno, req->rq_reqmsg->opc,
 149                                req->rq_import->imp_client->cli_request_portal);
 150                         rc = ptlrpc_replay_req(req);
 151 #if 0
 152 #error We should not hold a spinlock over such a lengthy operation.
 153 #error If necessary, drop spinlock, do operation, re-get spinlock, restart loop.
 154 #error If we need to avoid re-processint items, then delete them from the list
 155 #error as they are replayed and re-add at the tail of this list, so the next
 156 #error item to process will always be at the head of the list.
 157 #endif
 158                         if (rc) {
 159                                 CERROR("recovery replay error %d for req %Ld\n",
 160                                        rc, req->rq_xid);
 161                                 GOTO(out, rc);
 162                         }
 163                 }
 164
 165                 /* server has seen req, we have reply: skip */
 166                 if ((req->rq_flags & PTL_RPC_FL_REPLIED)  &&
 167                     req->rq_xid <= conn->c_last_xid) {
 168                         CDEBUG(D_HA, "REPLIED SKIP: xid "LPD64" transno "LPD64" op %d @ %d\n",
 169                                req->rq_xid, req->rq_repmsg->transno, req->rq_reqmsg->opc,
 170                                req->rq_import->imp_client->cli_request_portal);
 171                         continue;
 172                 }
 173
 174                 /* server has lost req, we have reply: resend, ign reply */
 175                 if ((req->rq_flags & PTL_RPC_FL_REPLIED)  &&
 176                     req->rq_xid > conn->c_last_xid) {
 177                         CDEBUG(D_HA, "REPLIED RESEND: xid "LPD64" transno "LPD64" op %d @ %d\n",
 178                                req->rq_xid, req->rq_repmsg->transno, req->rq_reqmsg->opc,
 179                                req->rq_import->imp_client->cli_request_portal);
 180                         rc = ptlrpc_replay_req(req);
 181                         if (rc) {
 182                                 CERROR("request resend error %d for req %Ld\n",
 183                                        rc, req->rq_xid);
 184                                 GOTO(out, rc);
 185                         }
 186                 }
 187
 188                 /* server has seen req, we have lost reply: -ERESTARTSYS */
 189                 if ( !(req->rq_flags & PTL_RPC_FL_REPLIED)  &&
 190                      req->rq_xid <= conn->c_last_xid) {
 191                         CDEBUG(D_HA, "RESTARTSYS: xid "LPD64" op %d @ %d\n",
 192                                req->rq_xid, req->rq_reqmsg->opc,
 193                                req->rq_import->imp_client->cli_request_portal);
 194                         ptlrpc_restart_req(req);
 195                 }
 196
 197                 /* service has not seen req, no reply: resend */
 198                 if ( !(req->rq_flags & PTL_RPC_FL_REPLIED)  &&
 199                      req->rq_xid > conn->c_last_xid) {
 200                         CDEBUG(D_HA, "RESEND: xid "LPD64" transno "LPD64" op %d @ %d\n",
 201                                req->rq_xid, req->rq_repmsg->transno, req->rq_reqmsg->opc,
 202                                req->rq_import->imp_client->cli_request_portal);
 203                         ptlrpc_resend_req(req);
 204                 }
 205
 206         }
 207
 208         conn->c_level = LUSTRE_CONN_FULL;
 209         recovd_conn_fixed(conn);
 210
 211         CERROR("recovery complete on conn %p(%s), waking delayed reqs\n",
 212                conn, conn->c_remote_uuid);
 213         /* Finally, continue what we delayed since recovery started */
 214         list_for_each_safe(tmp, pos, &conn->c_delayed_head) {
 215                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
 216                 ptlrpc_continue_req(req);
 217         }
 218
 219         EXIT;
 220  out:
 221         spin_unlock(&conn->c_lock);
 222         return rc;
 223 }