From ba0ff7df603e9d8e74a3eb64991b22bd65d3a6f2 Mon Sep 17 00:00:00 2001 From: adilger Date: Mon, 12 Dec 2005 09:16:32 +0000 Subject: [PATCH] Branch b_release_1_4_6 Severity : enhancement Bugzilla : 9477, 9557, 9870 Description: Verify that the MDS configuration logs are updated when xml is Details : Check if the .xml configuration logs are newer than the config logs stored on the MDS and report an error if this is the case. Request --write-conf, or allow starting with --old_conf. r=adilger,nathan (lincent original patch, modified by adilger) Severity : enhancement Bugzilla : 6034 Description: Handle symlinks in the path when checking if Lustre is mounted. Details : Resolve intermediate symlinks when checking if a client has mounted a filesystem to avoid duplicate client mounts. r=phil,adilger (Fergal original patch) Severity : minor Frequency : rare Bugzilla : 9309 Description: lconf can hit an error exception but still return success. Details : The lconf command catches the Command error exception at the top level script context and will exit with the associated exit status, but doesn't ensure that this exit status is non-zero. r=phil,adilger (Fergal original patch) Add "accept=all" when running from a developer tree to allow liblustre testing, as insmod does not look at /etc/modules.conf for module options. r=green --- lustre/ChangeLog | 29 +++++++++-- lustre/utils/lconf | 145 +++++++++++++++++++++++++++++------------------------ 2 files changed, 104 insertions(+), 70 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 3e04199..5c4f770 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -402,23 +402,44 @@ Severity : enhancement Bugzilla : 9297 Description: Stop sending data to evicted clients as soon as possible. Details : Check if the client we are about to send or are sending data to - was evicted already. (Check is done every second of waiting, + was evicted already. (Check is done every second of waiting, for which l_wait_event interface was extended to allow checking of exit condition at specified intervals). Severity : minor +Bugzilla : 9301 Frequency : rare Description: 'bad disk LOV MAGIC: 0x00000000' error when chown'ing files without - objects + objects Details : Make mds_get_md to recognise empty md case and set lmm size to 0. Severity : minor Bugzilla : 9794 Description: Liblustre uses system PRNG disturbing its usage by user application Details : Introduce internal to lustre fast and high-quality PRNG for - lustre usage and make liblustre and some other places in generic - lustre code to use it. + lustre usage and make liblustre and some other places in generic + lustre code to use it. +Severity : enhancement +Bugzilla : 9477, 9557, 9870 +Description: Verify that the MDS configuration logs are updated when xml is +Details : Check if the .xml configuration logs are newer than the config + logs stored on the MDS and report an error if this is the case. + Request --write-conf, or allow starting with --old_conf. + +Severity : enhancement +Bugzilla : 6034 +Description: Handle symlinks in the path when checking if Lustre is mounted. +Details : Resolve intermediate symlinks when checking if a client has + mounted a filesystem to avoid duplicate client mounts. + +Severity : minor +Frequency : rare +Bugzilla : 9309 +Description: lconf can hit an error exception but still return success. +Details : The lconf command catches the Command error exception at the top + level script context and will exit with the associated exit + status, but doesn't ensure that this exit status is non-zero. ------------------------------------------------------------------------------ diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 107b87f..71436d8 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -580,6 +580,32 @@ def is_block(path): return 0 return stat.S_ISBLK(s[stat.ST_MODE]) +def my_realpath(path): + try: + if os.path.islink(path): + # get the realpath of the mount point path + if 'realpath' in dir(os.path): + real_path = os.path.realpath(path) + else: + real_path = path + link_count = 0 + while os.path.islink(real_path) and (link_count < 20): + link_count = link_count + 1 + path_link = os.readlink(real_path) + if os.path.isabs(path_link): + real_path = path_link + else: + real_path = os.path.join(os.path.dirname(real_path), path_link) + if link_count > 19: + panic("Encountered too many symbolic links resolving path:", path) + else: + real_path = path + + return real_path + except: + panic("Fatal error realpath()ing path:", path) + + # build fs according to type # fixme: dangerous def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): @@ -612,20 +638,7 @@ def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): # get the realpath of the device # it may be the real device, such as /dev/hda7 # or the hardlink created via mknod for a device - if 'realpath' in dir(os.path): - real_dev = os.path.realpath(dev) - else: - real_dev = dev - link_count = 0 - while os.path.islink(real_dev) and (link_count < 20): - link_count = link_count + 1 - dev_link = os.readlink(real_dev) - if os.path.isabs(dev_link): - real_dev = dev_link - else: - real_dev = os.path.join(os.path.dirname(real_dev), dev_link) - if link_count > 19: - panic("Encountered too many symbolic links resolving block device:", dev) + real_dev = my_realpath(dev) # get the major and minor number of the realpath via ls # it seems python(os.stat) does not return @@ -829,12 +842,14 @@ def is_network_prepared(): def fs_is_mounted(path): """Return true if path is a mounted lustre filesystem""" try: + real_path = my_realpath(path) + fp = open('/proc/mounts') lines = fp.readlines() fp.close() for l in lines: a = string.split(l) - if a[1] == path and a[2] == 'lustre_lite': + if a[1] == real_path and a[2] == 'lustre_lite': return 1 except IOError, e: log(e) @@ -861,15 +876,18 @@ class kmod: if mod_loaded(mod) and not config.noexec: continue log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir) + options = '' if mod == 'lnet': #For LNET we really need modprobe to load defined LNDs run('/sbin/modprobe lnet') - #But if that fails, try insmod anyhow + #But if that fails, try insmod anyhow with dev option + #accept=all for dev liblustre testing + options = 'accept=all' if src_dir: module = find_module(src_dir, dev_dir, mod) if not module: panic('module not found:', mod) - (rc, out) = run('/sbin/insmod', module) + (rc, out) = run('/sbin/insmod', module, options) if rc and not mod_loaded(mod): if rc == 1: print("Bad module options? Check dmesg.") @@ -897,6 +915,7 @@ class kmod: # remove any self-ref portals created lctl.unconfigure_network() if config.dump: + debug('dumping debug log to', config.dump) # debug hack lctl.dump(config.dump) log('unloading the network') @@ -1157,6 +1176,7 @@ class MDSDEV(Module): self.active = 0 self.inode_size = self.db.get_val_int('inodesize', 0) + debug('original inode_size ', self.inode_size) if self.inode_size == 0: # find the LOV for this MDS lovconfig_uuid = mds.get_first_ref('lovconfig') @@ -1183,8 +1203,7 @@ class MDSDEV(Module): # self.inode_size = 256 else: self.inode_size = 512 - debug('stripe_count %d, inode_size %d', - stripe_count, self.inode_size) + debug('stripe_count ', stripe_count,' inode_size ',self.inode_size) self.target_dev_uuid = self.uuid self.uuid = target_uuid @@ -1303,7 +1322,7 @@ class MDSDEV(Module): noexec_opt = ('', '-n') ret, out = run (sys.argv[0], noexec_opt[old_noexec == 1], - " -v --record --nomod", + " -v --record --nomod --old_conf", "--record_log", client_name, "--record_device", self.name, "--node", client_name, @@ -1328,22 +1347,22 @@ class MDSDEV(Module): #change the mtime of LLOG to match the XML creation time if toplustreDB.get_mtime(): - mtime = string.atof(toplustreDB.get_mtime()) - runcmd("mkdir /tmp/lustre-XXXX/") - if is_block(self.devpath): - ret, out = runcmd("mount %s /tmp/lustre-XXXX/" %self.devpath) - else: - ret, out = runcmd("mount -o loop %s /tmp/lustre-XXXX/" %self.devpath) + mtime = toplustreDB.get_mtime() + debug("changing mtime of LOGS to %s" %mtime) + ret, mktemp = runcmd("mktemp /tmp/lustre-cmd.XXXXXXXX") if ret: - print out[0] - try: - os.utime("/tmp/lustre-XXXX/LOGS", (mtime, mtime)) - except OSError: - runcmd("umount -f /tmp/lustre-XXXX/") - panic("Can't adjust config creation time!") - runcmd("umount -f /tmp/lustre-XXXX/") - else: - print "XML file does not contain mtime, skip mtime checking." + log(self.module_name, "create mtime LOGS cmdfile failed: ", self.name) + else: + mtimecmdfile = string.split(mktemp[0])[0] + #mtimecmdfile="/tmp/lustre-cmd.XXXXXXXX" + fd = os.open(mtimecmdfile, os.O_RDWR | os.O_CREAT) + os.write(fd, "\n\n\n\n\n%s\n\n" %mtime) + os.close(fd) + cmd = "debugfs -w -R \"mi /LOGS\" <%s %s" %(mtimecmdfile, self.devpath) + ret, outs = runcmd(cmd) + os.remove(mtimecmdfile) + if ret: + print "Can not change mtime of LOGS by debugfs." def mds_remaining(self): out = lctl.device_list() @@ -2074,38 +2093,28 @@ def doCheckMtime(lustreDB, hosts): if s[1].get_class() == 'mdsdev': mdsdb = s[1] break - if mdsdb: - if lustreDB.get_mtime(): - if config.verbose: - print "Checking XML modification time" - devpath = mdsdb.get_val('devpath','') - xmtime = int(lustreDB.get_mtime()) - runcmd("mkdir /tmp/lustre-XXXX/") - # mounting ro causes confusing syslog errors - if is_block(devpath): - ret, out = runcmd("mount %s /tmp/lustre-XXXX/" %devpath) - else: - ret, out = runcmd("mount -o loop %s /tmp/lustre-XXXX/" %devpath) - if ret: - print out[0] - else: - try: - out = os.stat("/tmp/lustre-XXXX/LOGS") - except OSError: - runcmd("umount -f /tmp/lustre-XXXX") - panic("Warning: Can't read Lustre logs." - " Please run --write_conf to update.") - runcmd("umount -f /tmp/lustre-XXXX") - try: - kmtime = int(out[8]) - except ValueError: - kmtime = xmtime - if xmtime > kmtime : - debug('xmtime ', xmtime, '> kmtime', kmtime) - panic("Warning: the startup logs are older than the XML file." - " Please run --write_conf to update.") + + if mdsdb and lustreDB.get_mtime(): + debug("Checking XML modification time") + devpath = mdsdb.get_val('devpath','') + xmtime = string.atol(lustreDB.get_mtime()) + cmd = "debugfs -c -R 'stat /LOGS' %s 2>&1 | grep mtime" %devpath + ret, kmtimes = runcmd(cmd) + if ret: + log("Can not get mtime info of MDS LOGS directory") else: - print "XML file does not contain mtime, skip mtime checking." + kmtime = string.atoi(string.split(kmtimes[0])[1], 0) + if xmtime > kmtime: + debug('xmtime ', xmtime, '> kmtime', kmtime) + if config.old_conf: + log("Warning: MDS startup logs are older than config %s." + " Please run --write_conf on stopped MDS to update." + %CONFIG_FILE) + else: + panic("Error: MDS startup logs are older than config %s." + " Please run --write_conf on stopped MDS to update." + " Use '--old_conf' to start anyways." %CONFIG_FILE) + return # # Load profile for @@ -2583,6 +2592,7 @@ lconf_options = [ ('dump', "Dump the kernel debug log to file before portals is unloaded", PARAM), ('write_conf', "Save all the client config information on mds."), + ('old_conf', "Start up service even though config logs appear outdated."), ('record', "Write config information on mds."), ('record_log', "Name of config record log.", PARAM), ('record_device', "MDS device name that will record the config commands", @@ -2763,6 +2773,9 @@ if __name__ == "__main__": sys.exit(1) except CommandError, e: e.dump() + rc = e.rc + if rc == 0: + rc = 1 sys.exit(e.rc) if first_cleanup_error: -- 1.8.3.1