From e64db7532a856bd517a95999974e73642e147720 Mon Sep 17 00:00:00 2001 From: zhanghc Date: Tue, 10 Mar 2009 15:13:11 +0000 Subject: [PATCH] b=17710 Add "sys.ldlm_timeout" as an option to "mkfs.lustre" to set timeout of ldlm i=adilger i=nathan.rutman --- lustre/include/lustre_cfg.h | 6 ++++- lustre/include/lustre_param.h | 1 + lustre/mgs/mgs_llog.c | 45 +++++++++++++++++++++++---------- lustre/obdclass/obd_config.c | 9 +++++++ lustre/tests/conf-sanity.sh | 59 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 14 deletions(-) diff --git a/lustre/include/lustre_cfg.h b/lustre/include/lustre_cfg.h index 5e91eb5..3a6b670 100644 --- a/lustre/include/lustre_cfg.h +++ b/lustre/include/lustre_cfg.h @@ -47,7 +47,10 @@ #define LCFG_HDR_SIZE(count) \ size_round(offsetof (struct lustre_cfg, lcfg_buflens[(count)])) -/* If not LCFG_REQUIRED, we can ignore this cmd and go on. */ +/* If the LCFG_REQUIRED bit is set in a configuration command, + * then the client is required to understand this parameter + * in order to mount the filesystem. If it does not understand + * a REQUIRED command the client mount will fail. */ #define LCFG_REQUIRED 0x0001000 enum lcfg_command_type { @@ -70,6 +73,7 @@ enum lcfg_command_type { LCFG_LOG_START = 0x00ce011, LCFG_LOG_END = 0x00ce012, LCFG_LOV_ADD_INA = 0x00ce013, + LCFG_SET_LDLM_TIMEOUT = 0x00ce030, }; struct lustre_cfg_bufs { diff --git a/lustre/include/lustre_param.h b/lustre/include/lustre_param.h index 52b1d29..5c5f15b 100644 --- a/lustre/include/lustre_param.h +++ b/lustre/include/lustre_param.h @@ -65,6 +65,7 @@ int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, /* System global or special params not handled in obd's proc */ #define PARAM_SYS_TIMEOUT "sys.timeout=" /* global */ +#define PARAM_SYS_LDLM_TIMEOUT "sys.ldlm_timeout=" /* global */ #define PARAM_MGSNODE "mgsnode=" /* during mount */ #define PARAM_FAILNODE "failover.node=" /* llog generation */ #define PARAM_FAILMODE "failover.mode=" /* llog generation */ diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index ffc53eb..d24c42a 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -1410,6 +1410,29 @@ static int mgs_wlp_lcfg(struct obd_device *obd, struct fs_db *fsdb, return rc; } +/* write global obd timeout or ldlm timeout param into log */ +static int mgs_write_log_timeout(struct obd_device *obd, struct fs_db *fsdb, + struct mgs_target_info *mti, char *value, + int cmd, char *comment) +{ + struct lustre_cfg_bufs bufs; + struct lustre_cfg *lcfg; + int timeout; + int rc; + + timeout = simple_strtoul(value, NULL, 0); + CDEBUG(D_MGS, "timeout: %d (%s)\n", timeout, comment); + + lustre_cfg_bufs_reset(&bufs, NULL); + lcfg = lustre_cfg_new(cmd, &bufs); + lcfg->lcfg_num = timeout; + /* modify all servers and clients */ + rc = mgs_write_log_direct_all(obd, fsdb, mti, lcfg, mti->mti_fsname, + comment); + lustre_cfg_free(lcfg); + return rc; +} + static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb, struct mgs_target_info *mti) { @@ -1467,20 +1490,16 @@ static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb, } if (class_match_param(ptr, PARAM_SYS_TIMEOUT, &tmp) == 0) { - /* Change obd timeout */ - int timeout; - timeout = simple_strtoul(tmp, NULL, 0); + rc = mgs_write_log_timeout(obd, fsdb, mti, tmp, + LCFG_SET_TIMEOUT, + "obd_timeout"); + goto end_while; + } - CDEBUG(D_MGS, "obd timeout %d\n", timeout); - - lustre_cfg_bufs_reset(&bufs, NULL); - lcfg = lustre_cfg_new(LCFG_SET_TIMEOUT, &bufs); - lcfg->lcfg_num = timeout; - /* modify all servers and clients */ - rc = mgs_write_log_direct_all(obd, fsdb, mti, lcfg, - mti->mti_fsname, - "timeout"); - lustre_cfg_free(lcfg); + if (class_match_param(ptr, PARAM_SYS_LDLM_TIMEOUT, &tmp) == 0) { + rc = mgs_write_log_timeout(obd, fsdb, mti, tmp, + LCFG_SET_LDLM_TIMEOUT, + "ldlm_timeout"); goto end_while; } diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 7eaeb96..fe2eef3 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -801,6 +801,15 @@ int class_process_config(struct lustre_cfg *lcfg) obd_timeout = max(lcfg->lcfg_num, 1U); GOTO(out, err = 0); } + case LCFG_SET_LDLM_TIMEOUT: { + CDEBUG(D_IOCTL, "changing lustre ldlm_timeout from %d to %d\n", + ldlm_timeout, lcfg->lcfg_num); + ldlm_timeout = max(lcfg->lcfg_num, 1U); + if (ldlm_timeout >= obd_timeout) + ldlm_timeout = max(obd_timeout / 3, 1U); + + GOTO(out, err = 0); + } case LCFG_SET_UPCALL: { LCONSOLE_ERROR_MSG(0x15a, "recovery upcall is deprecated\n"); /* COMPAT_146 Don't fail on old configs */ diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 2667166..1510476 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -1571,6 +1571,65 @@ test_48() { # bug 17636 } run_test 48 "too many acls on file" +# check PARAM_SYS_LDLM_TIMEOUT option of MKFS.LUSTRE +test_49() { # bug 17710 + local OLD_MDS_MKFS_OPTS=$MDS_MKFS_OPTS + local OLD_OST_MKFS_OPTS=$OST_MKFS_OPTS + local OLD_TIMEOUT=$TIMEOUT + + TIMEOUT=20 + + MDS_MKFS_OPTS="--mgs --mdt --fsname=$FSNAME --device-size=$MDSSIZE --param sys.timeout=$TIMEOUT --param sys.ldlm_timeout=$TIMEOUT $MKFSOPT $MDSOPT" + + reformat + start_mds + start_ost + mount_client $MOUNT + check_mount || return 1 + + echo "check ldlm_timout..." + LDLM_MDS="`do_facet mds lctl get_param -n ldlm_timeout`" + LDLM_OST1="`do_facet ost1 lctl get_param -n ldlm_timeout`" + LDLM_CLIENT="`do_facet client lctl get_param -n ldlm_timeout`" + + if [ $LDLM_MDS -ne $LDLM_OST1 ] || [ $LDLM_MDS -ne $LDLM_CLIENT ]; then + error "Different LDLM_TIMEOUT: $LDLM_MDS $LDLM_OST $LDLM_CLIENT" + fi + + if [ $LDLM_MDS -ne $((TIMEOUT / 3)) ]; then + error "LDLM_TIMEOUT($LDLM_MDS) is not correct" + fi + + umount_client $MOUNT + stop_ost || return 2 + stop_mds || return 3 + + OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT --param sys.ldlm_timeout=$((TIMEOUT - 1)) $MKFSOPT $OSTOPT" + + reformat + start_mds || return 4 + start_ost || return 5 + mount_client $MOUNT || return 6 + check_mount || return 7 + + LDLM_MDS="`do_facet mds lctl get_param -n ldlm_timeout`" + LDLM_OST1="`do_facet ost1 lctl get_param -n ldlm_timeout`" + LDLM_CLIENT="`do_facet client lctl get_param -n ldlm_timeout`" + + if [ $LDLM_MDS -ne $LDLM_OST1 ] || [ $LDLM_MDS -ne $LDLM_CLIENT ]; then + error "Different LDLM_TIMEOUT: $LDLM_MDS $LDLM_OST $LDLM_CLIENT" + fi + + if [ $LDLM_MDS -ne $((TIMEOUT - 1)) ]; then + error "LDLM_TIMEOUT($LDLM_MDS) is not correct" + fi + + cleanup || return $? + + MDS_MKFS_OPTS=$OLD_MDS_MKFS_OPTS + OST_MKFS_OPTS=$OLD_OST_MKFS_OPTS +} +run_test 49 "check PARAM_SYS_LDLM_TIMEOUT option of MKFS.LUSTRE" equals_msg `basename $0`: test complete [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true -- 1.8.3.1