Whamcloud - gitweb
- Chain imports on connection, like exports.
authorshaver <shaver>
Fri, 6 Sep 2002 15:11:19 +0000 (15:11 +0000)
committershaver <shaver>
Fri, 6 Sep 2002 15:11:19 +0000 (15:11 +0000)
- Need to be able to go import->obd during recovery.
- Rename mdc to cli in client_obd_connect so that I don't get confused as
  often.
- Use ptlrpc_req_finished instead of ptlrpc_free_req in a few places where
  we need to keep the request around for possible replay during recovery.
- Flag opens as needing replay.
- Rewrite request address and cookie during replay and resend, to handle
  the recovery case (the connection handle changes when the server reboots,
  for example).
- ll_reconnect now correctly reconnects with a new server (possibly with
  a new server UUID).  Tested for MDS, "obviously correct" for OST as well.
- no more references to mdc_getstatus or sbi2mdc in the (now fs-distinct)
  ll_reconnect_recover code.

We can now successfully reconnect to a rebooted MDS and begin replay.
Need more work (on MDS or client, not clear yet) to make the replay
succeed.

lustre/include/linux/lustre_import.h
lustre/include/linux/lustre_net.h
lustre/lib/l_net.c
lustre/llite/file.c
lustre/llite/namei.c
lustre/mdc/mdc_request.c
lustre/ptlrpc/client.c
lustre/ptlrpc/connection.c
lustre/ptlrpc/recover.c
lustre/ptlrpc/rpc.c

index e6418cf..aa3e3d7 100644 (file)
@@ -17,6 +17,11 @@ struct obd_import {
         struct ptlrpc_connection *imp_connection;
         struct ptlrpc_client     *imp_client;
         struct lustre_handle      imp_handle;
+        struct list_head          imp_chain;
+        struct obd_device        *imp_obd;
+        /* XXX need a UUID here, I think
+         * XXX what about client_obd.cl_target_uuid?
+         */
 };
 
 extern struct obd_import *class_conn2cliimp(struct lustre_handle *);
index 79c7d02..08c67c6 100644 (file)
@@ -62,9 +62,8 @@ struct ptlrpc_connection {
         struct list_head        c_dying_head;  /* protected by c_lock */
         struct recovd_data      c_recovd_data;
 
-        struct list_head        c_clients; /* XXXshaver will be c_imports */
+        struct list_head        c_imports;
         struct list_head        c_exports;
-
 };
 
 struct ptlrpc_client {
index 09e66d9..844aa8d 100644 (file)
@@ -51,7 +51,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
         struct obd_ioctl_data* data = buf;
         int rq_portal, rp_portal;
         char *name;
-        struct client_obd *mdc = &obddev->u.cli;
+        struct client_obd *cli = &obddev->u.cli;
         char server_uuid[37];
         ENTRY;
 
@@ -85,21 +85,22 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
                 RETURN(-EINVAL);
         }
 
-        sema_init(&mdc->cl_sem, 1);
-        mdc->cl_conn_count = 0;
-        memcpy(mdc->cl_target_uuid, data->ioc_inlbuf1, data->ioc_inllen1);
+        sema_init(&cli->cl_sem, 1);
+        cli->cl_conn_count = 0;
+        memcpy(cli->cl_target_uuid, data->ioc_inlbuf1, data->ioc_inllen1);
         memcpy(server_uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2,
                                                    sizeof(server_uuid)));
 
-        mdc->cl_import.imp_connection = ptlrpc_uuid_to_connection(server_uuid);
-        if (!mdc->cl_import.imp_connection)
+        cli->cl_import.imp_connection = ptlrpc_uuid_to_connection(server_uuid);
+        if (!cli->cl_import.imp_connection)
                 RETURN(-ENOENT);
 
         ptlrpc_init_client(rq_portal, rp_portal, name,
                            &obddev->obd_ldlm_client);
-        mdc->cl_import.imp_client = &obddev->obd_ldlm_client;
+        cli->cl_import.imp_client = &obddev->obd_ldlm_client;
+        cli->cl_import.imp_obd = obddev;
 
-        mdc->cl_max_mdsize = sizeof(struct lov_mds_md);
+        cli->cl_max_mdsize = sizeof(struct lov_mds_md);
 
         MOD_INC_USE_COUNT;
         RETURN(0);
@@ -125,6 +126,7 @@ int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd,
                           sizeof(obd->obd_uuid) };
         char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid};
         int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT;
+        struct ptlrpc_connection *c;
 
         ENTRY;
         down(&cli->cl_sem);
@@ -149,22 +151,21 @@ int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd,
 
         request->rq_level = LUSTRE_CONN_NEW;
         request->rq_replen = lustre_msg_size(0, NULL);
-        //   This handle may be important if a callback needs
-        //   to find the mdc/osc
         request->rq_reqmsg->addr = conn->addr;
         request->rq_reqmsg->cookie = conn->cookie;
-        class_conn2export(conn)->exp_connection = request->rq_connection;
+        c = class_conn2export(conn)->exp_connection = request->rq_connection;
 
         rc = ptlrpc_queue_wait(request);
         rc = ptlrpc_check_status(request, rc);
         if (rc)
                 GOTO(out_req, rc);
 
-        request->rq_connection->c_level = LUSTRE_CONN_FULL;
-        cli->cl_import.imp_handle = *(struct lustre_handle *)request->rq_repmsg;
+        list_add(&cli->cl_import.imp_chain, &c->c_imports);
+        c->c_level = LUSTRE_CONN_FULL;
+        cli->cl_import.imp_handle.addr = request->rq_repmsg->addr;
+        cli->cl_import.imp_handle.cookie = request->rq_repmsg->cookie;
 
-        recovd_conn_manage(cli->cl_import.imp_connection,
-                           ptlrpc_recovd, ll_recover);
+        recovd_conn_manage(c, ptlrpc_recovd, ll_recover);
 
 
         EXIT;
@@ -223,6 +224,7 @@ int client_obd_disconnect(struct lustre_handle *conn)
         err = class_disconnect(conn);
         if (!rc && err)
                 rc = err;
+        list_del_init(&cli->cl_import.imp_chain);
         MOD_DEC_USE_COUNT;
  out_sem:
         up(&cli->cl_sem);
@@ -292,6 +294,7 @@ int target_handle_connect(struct ptlrpc_request *req)
         dlmimp->imp_client = &export->exp_obd->obd_ldlm_client;
         dlmimp->imp_handle.addr = req->rq_reqmsg->addr;
         dlmimp->imp_handle.cookie = req->rq_reqmsg->cookie;
+        dlmimp->imp_obd = /* LDLM! */ NULL;
         
 #warning Peter: is this the right place to upgrade the server connection level?
         req->rq_connection->c_level = LUSTRE_CONN_FULL;
index e4b69fe..359b592 100644 (file)
@@ -270,7 +270,7 @@ static int ll_file_release(struct inode *inode, struct file *file)
                         rc = -rc;
                 GOTO(out, rc);
         }
-        ptlrpc_free_req(fd->fd_req);
+        ptlrpc_req_finished(fd->fd_req);
 
         //ldlm_cli_cancel_unused();
 
index b9bb523..9d6f227 100644 (file)
@@ -269,7 +269,7 @@ static struct dentry *ll_lookup2(struct inode *dir, struct dentry *dentry,
 
         EXIT;
  neg_req:
-        ptlrpc_free_req(request);
+        ptlrpc_req_finished(request);
  negative:
         dentry->d_op = &ll_d_ops;
         d_add(dentry, inode);
index 5bd950b..206dcfc 100644 (file)
@@ -304,6 +304,10 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type,
                 /* pack the intended request */
                 mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
 
+                /* we need to replay opens */
+                if (it->it_op == IT_OPEN)
+                        req->rq_flags |= PTL_RPC_FL_REPLAY;
+
                 /* get ready for the reply */
                 req->rq_replen = lustre_msg_size(3, repsize);
         } else if (it->it_op == IT_READDIR) {
index 1257adb..b348e2a 100644 (file)
@@ -404,6 +404,8 @@ void ptlrpc_resend_req(struct ptlrpc_request *req)
         ENTRY;
         CDEBUG(D_INODE, "resend request %Ld, opc %d\n", 
                req->rq_xid, req->rq_reqmsg->opc);
+        req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
+        req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
         req->rq_status = -EAGAIN;
         req->rq_level = LUSTRE_CONN_RECOVD;
         req->rq_flags |= PTL_RPC_FL_RESEND;
@@ -502,6 +504,15 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
                 RETURN(-rc);
         }
 
+#if 0 && REPLAY_DEBUGGED
+        if (req->rq_flags & PTL_RPC_FL_REPLAY) {
+                /* keep a reference so it's around for replaying.
+                 * this is balanced in XXXXXX?
+                 */
+                atomic_inc(&req->rq_refcount);
+        }
+#endif
+
         spin_lock(&conn->c_lock);
         list_del(&req->rq_list);
         list_add_tail(&req->rq_list, &conn->c_sending_head);
@@ -580,6 +591,8 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
 
         req->rq_time = CURRENT_TIME;
         req->rq_timeout = obd_timeout;
+        req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
+        req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
         rc = ptl_send_rpc(req);
         if (rc) {
                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
index 31640d8..d95ee40 100644 (file)
@@ -76,7 +76,7 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer,
         INIT_LIST_HEAD(&c->c_delayed_head);
         INIT_LIST_HEAD(&c->c_sending_head);
         INIT_LIST_HEAD(&c->c_dying_head);
-        INIT_LIST_HEAD(&c->c_clients);
+        INIT_LIST_HEAD(&c->c_imports);
         INIT_LIST_HEAD(&c->c_exports);
         atomic_set(&c->c_refcount, 0);
         ptlrpc_connection_addref(c);
index 7e0cf5c..9809602 100644 (file)
 #include <linux/lustre_lite.h>
 #include <linux/lustre_ha.h>
 
-#if 0
-/* FIXME: reference to mdc_getstatus causes dependency problems */
-static int ll_reconnect(struct ll_sb_info *sbi)
+int ll_reconnect(struct ptlrpc_connection *conn) 
 {
-        struct ll_fid rootfid;
-        __u64 last_committed;
-        __u64 last_xid;
-        int err;
         struct ptlrpc_request *request; 
-        struct ptlrpc_connection *conn = sbi2mdc(sbi)->cl_import.imp_connection;
-
-        ptlrpc_readdress_connection(conn, "mds");
+        struct list_head *tmp;
+        int rc = -EINVAL;
 
+        /* XXX c_lock semantics! */
         conn->c_level = LUSTRE_CONN_CON;
 
-        /* XXX: need to store the last_* values somewhere */
-        err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid, &last_committed,
-                            &last_xid, &request);
-        if (err) {
-                CERROR("cannot mds_connect: rc = %d\n", err);
-                GOTO(out_disc, err = -ENOTCONN);
+        /* XXX this code MUST be shared with class_obd_connect! */
+        list_for_each(tmp, &conn->c_imports) {
+                struct obd_import *imp = list_entry(tmp, struct obd_import,
+                                                    imp_chain);
+                struct obd_device *obd = imp->imp_obd;
+                struct client_obd *cli = &obd->u.cli;
+                int rq_opc = (obd->obd_type->typ_ops->o_brw)
+                        ? OST_CONNECT : MDS_CONNECT;
+                int size[] = { sizeof(cli->cl_target_uuid),
+                               sizeof(obd->obd_uuid) };
+                char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid };
+                struct lustre_handle old_hdl;
+
+                LASSERT(imp->imp_connection == conn);
+                request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
+                request->rq_level = LUSTRE_CONN_NEW;
+                request->rq_replen = lustre_msg_size(0, NULL);
+                /* XXX are (addr, cookie) right? */
+                request->rq_reqmsg->addr = imp->imp_handle.addr;
+                request->rq_reqmsg->cookie = imp->imp_handle.cookie;
+                rc = ptlrpc_queue_wait(request);
+                rc = ptlrpc_check_status(request, rc);
+                if (rc) {
+                        CERROR("cannot connect to %s@%s: rc = %d\n",
+                               cli->cl_target_uuid, conn->c_remote_uuid, rc);
+                        ptlrpc_free_req(request);
+                        GOTO(out_disc, rc = -ENOTCONN);
+                }
+
+                old_hdl = imp->imp_handle;
+                imp->imp_handle.addr = request->rq_repmsg->addr;
+                imp->imp_handle.cookie = request->rq_repmsg->cookie;
+                CERROR("reconnected to %s@%s (%Lx/%Lx, was %Lx/%Lx)!\n",
+                       cli->cl_target_uuid, conn->c_remote_uuid,
+                       imp->imp_handle.addr, imp->imp_handle.cookie,
+                       old_hdl.addr, old_hdl.cookie);
+                ptlrpc_free_req(request);
         }
-        conn->c_last_xid = last_xid;
         conn->c_level = LUSTRE_CONN_RECOVD;
 
  out_disc:
-        return err;
+        return rc;
 }
-#endif
 
 static int ll_recover_upcall(struct ptlrpc_connection *conn)
 {
@@ -74,19 +97,15 @@ static int ll_recover_upcall(struct ptlrpc_connection *conn)
 
 static int ll_recover_reconnect(struct ptlrpc_connection *conn)
 {
-        RETURN(-ENOSYS);
-#if 0
-        /* XXXshaver this code needs to know about connection-driven recovery! */
-
-        struct ptlrpc_request *req;
-        struct list_head *tmp, *pos;
-        struct ll_sb_info *sbi = cli->cli_data;
-        struct ptlrpc_connection *conn = cli->cli_connection;
         int rc = 0;
+        struct list_head *tmp, *pos;
+        struct ptlrpc_request *req;
         ENTRY;
 
         /* 1. reconnect */
-        ll_reconnect(sbi);
+        rc = ll_reconnect(conn);
+        if (rc)
+                RETURN(rc);
         
         /* 2. walk the request list */
         spin_lock(&conn->c_lock);
@@ -97,12 +116,14 @@ static int ll_recover_reconnect(struct ptlrpc_connection *conn)
                 if (req->rq_flags & PTL_RPC_FL_REPLAY) {
                         CDEBUG(D_INODE, "req %Ld needs replay [last rcvd %Ld]\n",
                                req->rq_xid, conn->c_last_xid);
+                        rc = ptlrpc_replay_req(req);
+#if 0
 #error We should not hold a spinlock over such a lengthy operation.
 #error If necessary, drop spinlock, do operation, re-get spinlock, restart loop.
 #error If we need to avoid re-processint items, then delete them from the list
 #error as they are replayed and re-add at the tail of this list, so the next
 #error item to process will always be at the head of the list.
-                        rc = ptlrpc_replay_req(req);
+#endif
                         if (rc) {
                                 CERROR("recovery replay error %d for req %Ld\n",
                                        rc, req->rq_xid);
@@ -152,7 +173,7 @@ static int ll_recover_reconnect(struct ptlrpc_connection *conn)
 
         }
 
-        sbi2mdc(sbi)->cl_conn->c_level = LUSTRE_CONN_FULL;
+        conn->c_level = LUSTRE_CONN_FULL;
         recovd_conn_fixed(conn);
 
         /* Finally, continue what we delayed since recovery started */
@@ -165,7 +186,6 @@ static int ll_recover_reconnect(struct ptlrpc_connection *conn)
  out:
         spin_unlock(&conn->c_lock);
         return rc;
-#endif
 }
 
 int ll_recover(struct recovd_data *rd, int phase)
index 40a30ca..9485861 100644 (file)
@@ -73,7 +73,9 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg,
         struct obd_device *obd = class_conn2obd(hdl);
         struct recovd_obd *recovd = &obd->u.recovd;
         struct obd_ioctl_data *data = karg;
+#if 0 && PARALLEL_RECOVERY
         struct list_head *tmp;
+#endif
 
         ENTRY;