Whamcloud - gitweb
Branch b1_4_mountconf
authornathan <nathan>
Mon, 27 Feb 2006 16:50:52 +0000 (16:50 +0000)
committernathan <nathan>
Mon, 27 Feb 2006 16:50:52 +0000 (16:50 +0000)
b=8007
Use force/failover flags in mgc shutdown to prevent waiting around

lustre/ldlm/ldlm_lib.c
lustre/llite/llite_lib.c
lustre/mgs/mgs_handler.c
lustre/obdclass/obd_mount.c
lustre/ptlrpc/recover.c
lustre/utils/rmmod_all.sh

index 4d0ccc3..8a342a5 100644 (file)
@@ -640,8 +640,8 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         if (!export) {
                 spin_unlock(&target->obd_dev_lock);
         } else if (req->rq_reqmsg->conn_cnt == 1) {
-                CERROR("%s reconnected with 1 conn_cnt; cookies not random?\n",
-                       cluuid.uuid);
+                CERROR("%s reconnected with 1 conn_cnt; cookies not "
+                       "random?\n", cluuid.uuid);
                 GOTO(out, rc = -EALREADY);
         }
 
index 6281c92..beed138 100644 (file)
@@ -1454,10 +1454,15 @@ int ll_iocontrol(struct inode *inode, struct file *file,
 
 void ll_umount_begin(struct super_block *sb)
 {
+        struct lustre_sb_info *lsi = s2lsi(sb);
         struct ll_sb_info *sbi = ll_s2sbi(sb);
         struct obd_device *obd;
         struct obd_ioctl_data ioc_data = { 0 };
         ENTRY;
+
+        /* Make the MGC not try to cancel locks */
+        lsi->lsi_flags |= LSI_UMOUNT_FAILOVER;
+
         CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
                sb->s_count, atomic_read(&sb->s_active));
 
@@ -1479,7 +1484,6 @@ void ll_umount_begin(struct super_block *sb)
                 EXIT;
                 return;
         }
-
         obd->obd_no_recov = 1;
         obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_osc_exp, sizeof ioc_data,
                       &ioc_data, NULL);
index cd65026..9e0ab34 100644 (file)
@@ -81,6 +81,22 @@ static int mgs_connect(struct lustre_handle *conn, struct obd_device *obd,
         RETURN(rc);
 }
 
+static int mgs_reconnect(struct obd_export *exp, struct obd_device *obd,
+                         struct obd_uuid *cluuid,
+                         struct obd_connect_data *data)
+{
+        int rc = -ENOSYS;
+        ENTRY;
+
+        if (exp == NULL || obd == NULL || cluuid == NULL)
+                RETURN(-EINVAL);
+
+        CERROR("Reconnect FIXME\n");
+        //rc = mds_connect_internal(exp, data);
+
+        RETURN(rc);
+}
+
 static int mgs_disconnect(struct obd_export *exp)
 {
         unsigned long irqflags;
@@ -653,6 +669,7 @@ out_free:
 static struct obd_ops mgs_obd_ops = {
         .o_owner           = THIS_MODULE,
         .o_connect         = mgs_connect,
+        .o_reconnect       = mgs_reconnect,
         .o_disconnect      = mgs_disconnect,
         .o_setup           = mgs_setup,
         .o_precleanup      = mgs_precleanup,
index aed6f06..5da2b59 100644 (file)
@@ -685,6 +685,28 @@ static int lustre_stop_mgc(struct super_block *sb)
                 RETURN(-EBUSY); 
         }
 
+        obd->obd_force = 1;
+        /* Without the fail, we wait for locks to be dropped, so if the
+           MGS is down, we might wait for an obd timeout.  With the fail,
+           if the MGS is up, we don't tell it we're disconnecting, so 
+           we must wait until the MGS evicts the dead client before the 
+           client can reconnect. So it's either slow disconnect, or a 
+           slow reconnect. This could probably be fixed on the server side 
+           by ignoring handle mismatches in target_handle_reconnect. */
+        if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER) {
+                obd->obd_fail = 1;
+                /* client_disconnect_export uses this flag to decide whether it
+                   should disconnect or just invalidate.  (The MGC has no
+                   recoverable data in any case.) */
+                obd->obd_no_recov = 1;
+        }
+
+        /* see ll_umount_begin 
+        obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_mdc_exp, sizeof ioc_data,
+                      &ioc_data, NULL);
+        */
+        //rc = ptlrpc_set_import_active(obd->u.cli.cl_import, 0);
+
         if (obd->u.cli.cl_mgc_mgsexp)
                 obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
 
@@ -762,10 +784,11 @@ static int server_stop_servers(int lddflags, int lsiflags)
 
         if (obd && (!type || !type->typ_refcnt)) {
                 int err;
-                /* If the targets have stopped, I can force/fail the servers
-                   with no ill effects */
                 obd->obd_force = 1;
-                obd->obd_fail = 1;
+                /* This doesn't mean much on a server obd; could probably 
+                   drop it */
+                if (lsiflags & LSI_UMOUNT_FAILOVER)
+                        obd->obd_fail = 1;
                 err = class_manual_cleanup(obd);
                 if (!rc) 
                         rc = err;
@@ -1246,9 +1269,9 @@ static void server_umount_begin(struct super_block *sb)
         ENTRY;
 
         CDEBUG(D_MOUNT, "umount -f\n");
-        /* umount = normal
+        /* umount = force
            umount -f = failover
-           no third way to do LSI_UMOUNT_FORCE */
+           no third way to do non-force, non-failover */
         lsi->lsi_flags |= LSI_UMOUNT_FAILOVER;
         EXIT;
 }
index ddc16e7..bd0843f 100644 (file)
@@ -307,7 +307,8 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
 /*
  * Administratively active/deactive a client. 
  * This should only be called by the ioctl interface, currently
- * with the lctl deactivate and activate commands.
+ * with the lctl deactivate and activate commands, and
+ * client umount -f (ll_umount_begin)
  */
 int ptlrpc_set_import_active(struct obd_import *imp, int active)
 {
index 7f33141..62d526b 100755 (executable)
@@ -1,5 +1,5 @@
 #!/bin/sh
 
-./lctl modules | awk '{ print $2 }' | xargs rmmod
+./lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 
 # do it again, in case we tried to unload ksocklnd too early
 ./lctl modules | awk '{ print $2 }' | xargs rmmod