From: nathan Date: Mon, 27 Feb 2006 16:50:52 +0000 (+0000) Subject: Branch b1_4_mountconf X-Git-Tag: v1_8_0_110~486^4~23 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=fc33363c83d64b60b21f9f86e55d094d83b97d3f;p=fs%2Flustre-release.git Branch b1_4_mountconf b=8007 Use force/failover flags in mgc shutdown to prevent waiting around --- diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 4d0ccc3..8a342a5 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -640,8 +640,8 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) if (!export) { spin_unlock(&target->obd_dev_lock); } else if (req->rq_reqmsg->conn_cnt == 1) { - CERROR("%s reconnected with 1 conn_cnt; cookies not random?\n", - cluuid.uuid); + CERROR("%s reconnected with 1 conn_cnt; cookies not " + "random?\n", cluuid.uuid); GOTO(out, rc = -EALREADY); } diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 6281c92..beed138 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1454,10 +1454,15 @@ int ll_iocontrol(struct inode *inode, struct file *file, void ll_umount_begin(struct super_block *sb) { + struct lustre_sb_info *lsi = s2lsi(sb); struct ll_sb_info *sbi = ll_s2sbi(sb); struct obd_device *obd; struct obd_ioctl_data ioc_data = { 0 }; ENTRY; + + /* Make the MGC not try to cancel locks */ + lsi->lsi_flags |= LSI_UMOUNT_FAILOVER; + CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb, sb->s_count, atomic_read(&sb->s_active)); @@ -1479,7 +1484,6 @@ void ll_umount_begin(struct super_block *sb) EXIT; return; } - obd->obd_no_recov = 1; obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_osc_exp, sizeof ioc_data, &ioc_data, NULL); diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index cd65026..9e0ab34 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -81,6 +81,22 @@ static int mgs_connect(struct lustre_handle *conn, struct obd_device *obd, RETURN(rc); } +static int mgs_reconnect(struct obd_export *exp, struct obd_device *obd, + struct obd_uuid *cluuid, + struct obd_connect_data *data) +{ + int rc = -ENOSYS; + ENTRY; + + if (exp == NULL || obd == NULL || cluuid == NULL) + RETURN(-EINVAL); + + CERROR("Reconnect FIXME\n"); + //rc = mds_connect_internal(exp, data); + + RETURN(rc); +} + static int mgs_disconnect(struct obd_export *exp) { unsigned long irqflags; @@ -653,6 +669,7 @@ out_free: static struct obd_ops mgs_obd_ops = { .o_owner = THIS_MODULE, .o_connect = mgs_connect, + .o_reconnect = mgs_reconnect, .o_disconnect = mgs_disconnect, .o_setup = mgs_setup, .o_precleanup = mgs_precleanup, diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index aed6f06..5da2b59 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -685,6 +685,28 @@ static int lustre_stop_mgc(struct super_block *sb) RETURN(-EBUSY); } + obd->obd_force = 1; + /* Without the fail, we wait for locks to be dropped, so if the + MGS is down, we might wait for an obd timeout. With the fail, + if the MGS is up, we don't tell it we're disconnecting, so + we must wait until the MGS evicts the dead client before the + client can reconnect. So it's either slow disconnect, or a + slow reconnect. This could probably be fixed on the server side + by ignoring handle mismatches in target_handle_reconnect. */ + if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER) { + obd->obd_fail = 1; + /* client_disconnect_export uses this flag to decide whether it + should disconnect or just invalidate. (The MGC has no + recoverable data in any case.) */ + obd->obd_no_recov = 1; + } + + /* see ll_umount_begin + obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_mdc_exp, sizeof ioc_data, + &ioc_data, NULL); + */ + //rc = ptlrpc_set_import_active(obd->u.cli.cl_import, 0); + if (obd->u.cli.cl_mgc_mgsexp) obd_disconnect(obd->u.cli.cl_mgc_mgsexp); @@ -762,10 +784,11 @@ static int server_stop_servers(int lddflags, int lsiflags) if (obd && (!type || !type->typ_refcnt)) { int err; - /* If the targets have stopped, I can force/fail the servers - with no ill effects */ obd->obd_force = 1; - obd->obd_fail = 1; + /* This doesn't mean much on a server obd; could probably + drop it */ + if (lsiflags & LSI_UMOUNT_FAILOVER) + obd->obd_fail = 1; err = class_manual_cleanup(obd); if (!rc) rc = err; @@ -1246,9 +1269,9 @@ static void server_umount_begin(struct super_block *sb) ENTRY; CDEBUG(D_MOUNT, "umount -f\n"); - /* umount = normal + /* umount = force umount -f = failover - no third way to do LSI_UMOUNT_FORCE */ + no third way to do non-force, non-failover */ lsi->lsi_flags |= LSI_UMOUNT_FAILOVER; EXIT; } diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index ddc16e7..bd0843f 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -307,7 +307,8 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req) /* * Administratively active/deactive a client. * This should only be called by the ioctl interface, currently - * with the lctl deactivate and activate commands. + * with the lctl deactivate and activate commands, and + * client umount -f (ll_umount_begin) */ int ptlrpc_set_import_active(struct obd_import *imp, int active) { diff --git a/lustre/utils/rmmod_all.sh b/lustre/utils/rmmod_all.sh index 7f33141..62d526b 100755 --- a/lustre/utils/rmmod_all.sh +++ b/lustre/utils/rmmod_all.sh @@ -1,5 +1,5 @@ #!/bin/sh -./lctl modules | awk '{ print $2 }' | xargs rmmod +./lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 # do it again, in case we tried to unload ksocklnd too early ./lctl modules | awk '{ print $2 }' | xargs rmmod