From be613a2789c6e868956cd26faa5197b7514cb741 Mon Sep 17 00:00:00 2001 From: zhanghc Date: Tue, 10 Mar 2009 17:00:55 +0000 Subject: [PATCH] b=17710 Add "sys.ldlm_timeout" as an option to "mkfs.lustre" to set timeout of ldlm i=adilger i=nathan.rutman --- lustre/include/lustre_cfg.h | 6 ++++- lustre/include/lustre_param.h | 1 + lustre/mgs/mgs_llog.c | 45 +++++++++++++++++++++++---------- lustre/obdclass/obd_config.c | 9 +++++++ lustre/tests/conf-sanity.sh | 59 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 105 insertions(+), 15 deletions(-) diff --git a/lustre/include/lustre_cfg.h b/lustre/include/lustre_cfg.h index c51c0d3..e1a03e7 100644 --- a/lustre/include/lustre_cfg.h +++ b/lustre/include/lustre_cfg.h @@ -47,7 +47,10 @@ #define LCFG_HDR_SIZE(count) \ size_round(offsetof (struct lustre_cfg, lcfg_buflens[(count)])) -/* If not LCFG_REQUIRED, we can ignore this cmd and go on. */ +/* If the LCFG_REQUIRED bit is set in a configuration command, + * then the client is required to understand this parameter + * in order to mount the filesystem. If it does not understand + * a REQUIRED command the client mount will fail. */ #define LCFG_REQUIRED 0x0001000 enum lcfg_command_type { @@ -77,6 +80,7 @@ enum lcfg_command_type { LCFG_POOL_ADD = 0x00ce021, LCFG_POOL_REM = 0x00ce022, LCFG_POOL_DEL = 0x00ce023, + LCFG_SET_LDLM_TIMEOUT = 0x00ce030, }; struct lustre_cfg_bufs { diff --git a/lustre/include/lustre_param.h b/lustre/include/lustre_param.h index 0b7caf6..a0c9f6a 100644 --- a/lustre/include/lustre_param.h +++ b/lustre/include/lustre_param.h @@ -66,6 +66,7 @@ int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, /* System global or special params not handled in obd's proc */ #define PARAM_SYS_TIMEOUT "sys.timeout=" /* global */ +#define PARAM_SYS_LDLM_TIMEOUT "sys.ldlm_timeout=" /* global */ #define PARAM_MGSNODE "mgsnode=" /* during mount */ #define PARAM_FAILNODE "failover.node=" /* llog generation */ #define PARAM_FAILMODE "failover.mode=" /* llog generation */ diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index e5adb2f..11aea49 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -1855,6 +1855,28 @@ static int mgs_wlp_lcfg(struct obd_device *obd, struct fs_db *fsdb, lustre_cfg_free(lcfg); return rc; } +/* write global obd timeout or ldlm timeout param into log */ +static int mgs_write_log_timeout(struct obd_device *obd, struct fs_db *fsdb, + struct mgs_target_info *mti, char *value, + int cmd, char *comment) +{ + struct lustre_cfg_bufs bufs; + struct lustre_cfg *lcfg; + int timeout; + int rc; + + timeout = simple_strtoul(value, NULL, 0); + CDEBUG(D_MGS, "timeout: %d (%s)\n", timeout, comment); + + lustre_cfg_bufs_reset(&bufs, NULL); + lcfg = lustre_cfg_new(cmd, &bufs); + lcfg->lcfg_num = timeout; + /* modify all servers and clients */ + rc = mgs_write_log_direct_all(obd, fsdb, mti, lcfg, mti->mti_fsname, + comment); + lustre_cfg_free(lcfg); + return rc; +} static int mgs_srpc_set_param_disk(struct obd_device *obd, struct fs_db *fsdb, @@ -2205,7 +2227,6 @@ static int mgs_write_log_param(struct obd_device *obd, struct fs_db *fsdb, struct mgs_target_info *mti, char *ptr) { struct lustre_cfg_bufs bufs; - struct lustre_cfg *lcfg; char *logname; char *tmp; int rc = 0; @@ -2251,19 +2272,15 @@ static int mgs_write_log_param(struct obd_device *obd, struct fs_db *fsdb, } if (class_match_param(ptr, PARAM_SYS_TIMEOUT, &tmp) == 0) { - /* Change obd timeout */ - int timeout; - timeout = simple_strtoul(tmp, NULL, 0); - - CDEBUG(D_MGS, "obd timeout %d\n", timeout); - lustre_cfg_bufs_reset(&bufs, NULL); - lcfg = lustre_cfg_new(LCFG_SET_TIMEOUT, &bufs); - lcfg->lcfg_num = timeout; - /* modify all servers and clients */ - rc = mgs_write_log_direct_all(obd, fsdb, mti, lcfg, - mti->mti_fsname, - "timeout"); - lustre_cfg_free(lcfg); + rc = mgs_write_log_timeout(obd, fsdb, mti, tmp, + LCFG_SET_TIMEOUT, "obd_timeout"); + GOTO(end, rc); + } + + if (class_match_param(ptr, PARAM_SYS_LDLM_TIMEOUT, &tmp) == 0) { + rc = mgs_write_log_timeout(obd, fsdb, mti, tmp, + LCFG_SET_LDLM_TIMEOUT, + "ldlm_timeout"); GOTO(end, rc); } diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 5b0e0e0..74b94e4 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -853,6 +853,15 @@ int class_process_config(struct lustre_cfg *lcfg) obd_timeout = max(lcfg->lcfg_num, 1U); GOTO(out, err = 0); } + case LCFG_SET_LDLM_TIMEOUT: { + CDEBUG(D_IOCTL, "changing lustre ldlm_timeout from %d to %d\n", + ldlm_timeout, lcfg->lcfg_num); + ldlm_timeout = max(lcfg->lcfg_num, 1U); + if (ldlm_timeout >= obd_timeout) + ldlm_timeout = max(obd_timeout / 3, 1U); + + GOTO(out, err = 0); + } case LCFG_SET_UPCALL: { LCONSOLE_ERROR_MSG(0x15a, "recovery upcall is deprecated\n"); /* COMPAT_146 Don't fail on old configs */ diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 7c5dd43..db1deae 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -1687,6 +1687,65 @@ test_48() { # bug 17636 } run_test 48 "too many acls on file" +# check PARAM_SYS_LDLM_TIMEOUT option of MKFS.LUSTRE +test_49() { # bug 17710 + local OLD_MDS_MKFS_OPTS=$MDS_MKFS_OPTS + local OLD_OST_MKFS_OPTS=$OST_MKFS_OPTS + local OLD_TIMEOUT=$TIMEOUT + + TIMEOUT=20 + + MDS_MKFS_OPTS="--mgs --mdt --fsname=$FSNAME --device-size=$MDSSIZE --param sys.timeout=$TIMEOUT --param sys.ldlm_timeout=$TIMEOUT $MKFSOPT $MDSOPT" + + reformat + start_mds + start_ost + mount_client $MOUNT + check_mount || return 1 + + echo "check ldlm_timout..." + LDLM_MDS="`do_facet mds lctl get_param -n ldlm_timeout`" + LDLM_OST1="`do_facet ost1 lctl get_param -n ldlm_timeout`" + LDLM_CLIENT="`do_facet client lctl get_param -n ldlm_timeout`" + + if [ $LDLM_MDS -ne $LDLM_OST1 ] || [ $LDLM_MDS -ne $LDLM_CLIENT ]; then + error "Different LDLM_TIMEOUT: $LDLM_MDS $LDLM_OST $LDLM_CLIENT" + fi + + if [ $LDLM_MDS -ne $((TIMEOUT / 3)) ]; then + error "LDLM_TIMEOUT($LDLM_MDS) is not correct" + fi + + umount_client $MOUNT + stop_ost || return 2 + stop_mds || return 3 + + OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT --param sys.ldlm_timeout=$((TIMEOUT - 1)) $MKFSOPT $OSTOPT" + + reformat + start_mds || return 4 + start_ost || return 5 + mount_client $MOUNT || return 6 + check_mount || return 7 + + LDLM_MDS="`do_facet mds lctl get_param -n ldlm_timeout`" + LDLM_OST1="`do_facet ost1 lctl get_param -n ldlm_timeout`" + LDLM_CLIENT="`do_facet client lctl get_param -n ldlm_timeout`" + + if [ $LDLM_MDS -ne $LDLM_OST1 ] || [ $LDLM_MDS -ne $LDLM_CLIENT ]; then + error "Different LDLM_TIMEOUT: $LDLM_MDS $LDLM_OST $LDLM_CLIENT" + fi + + if [ $LDLM_MDS -ne $((TIMEOUT - 1)) ]; then + error "LDLM_TIMEOUT($LDLM_MDS) is not correct" + fi + + cleanup || return $? + + MDS_MKFS_OPTS=$OLD_MDS_MKFS_OPTS + OST_MKFS_OPTS=$OLD_OST_MKFS_OPTS +} +run_test 49 "check PARAM_SYS_LDLM_TIMEOUT option of MKFS.LUSTRE" cleanup_gss equals_msg `basename $0`: test complete -- 1.8.3.1