From 18c89b005d6845dd51a48b810255d8dd2e3248b5 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Wed, 14 Jul 2010 20:26:19 +0400 Subject: [PATCH] Remove obsolete configuration tools. Remove old 1.4 configuration tools from master. They were removed from b1_8 a long time ago. --- lustre/utils/lconf | 2928 -------------------------------------- lustre/utils/lmc | 1256 ---------------- lustre/utils/mds-failover-sample | 20 - 3 files changed, 4204 deletions(-) delete mode 100755 lustre/utils/lconf delete mode 100755 lustre/utils/lmc delete mode 100755 lustre/utils/mds-failover-sample diff --git a/lustre/utils/lconf b/lustre/utils/lconf deleted file mode 100755 index bfaa97a..0000000 --- a/lustre/utils/lconf +++ /dev/null @@ -1,2928 +0,0 @@ -#!/usr/bin/env python -# -# GPL HEADER START -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 only, -# as published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License version 2 for more details (a copy is included -# in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU General Public License -# version 2 along with this program; If not, see -# http://www.sun.com/software/products/lustre/docs/GPLv2.pdf -# copy of GPLv2]. -# -# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, -# CA 95054 USA or visit www.sun.com if you need additional information or -# have any questions. -# -# GPL HEADER END -# - -# -# Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. -# Use is subject to license terms. -# - -# -# This file is part of Lustre, http://www.lustre.org/ -# Lustre is a trademark of Sun Microsystems, Inc. -# -# Author: Robert Read -# Author: Mike Shaver -# -# lconf - lustre configuration tool -# -# lconf is the main driver script for starting and stopping -# lustre filesystem services. -# -# Based in part on the XML obdctl modifications done by Brian Behlendorf - -import sys, getopt, types, errno -import string, os, stat, popen2, socket, time, random, fcntl, select -import re, exceptions, signal, traceback -import xml.dom.minidom - -if sys.version[0] == '1': - from FCNTL import F_GETFL, F_SETFL -else: - from fcntl import F_GETFL, F_SETFL - -PYMOD_DIR = ["/usr/lib64/lustre/python", "/usr/lib/lustre/python"] -PLATFORM = '' -KEXTPATH = '' -if string.find(sys.platform, 'linux') != -1: - PLATFORM='LINUX' -elif string.find(sys.platform, 'darwin') != -1: - PLATFORM='DARWIN' - KEXTPATH='/System/Library/Extensions/' -else: - PLATFORM='Unsupported' - -def development_mode(): - base = os.path.dirname(sys.argv[0]) - if os.access(base+"/Makefile", os.R_OK): - return 1 - return 0 - -if development_mode(): - sys.path.append('../utils') -else: - sys.path.extend(PYMOD_DIR) - -import Lustre - -# Global parameters -MAXTCPBUF = 16777216 -# -# Maximum number of devices to search for. -# (the /dev/loop* nodes need to be created beforehand) -MAX_LOOP_DEVICES = 256 -PORTALS_DIR = '../lnet' - -# Needed to call lconf --record -CONFIG_FILE = "" - -# Please keep these in sync with the values in lnet/include/libcfs/libcfs.h -ptldebug_names = { - "trace" : (1 << 0), - "inode" : (1 << 1), - "super" : (1 << 2), - "ext2" : (1 << 3), - "malloc" : (1 << 4), - "cache" : (1 << 5), - "info" : (1 << 6), - "ioctl" : (1 << 7), - "blocks" : (1 << 8), - "net" : (1 << 9), - "warning" : (1 << 10), - "buffs" : (1 << 11), - "other" : (1 << 12), - "dentry" : (1 << 13), - "portals" : (1 << 14), # deprecated - "lnet" : (1 << 14), - "page" : (1 << 15), - "dlmtrace" : (1 << 16), - "error" : (1 << 17), - "emerg" : (1 << 18), - "ha" : (1 << 19), - "rpctrace" : (1 << 20), - "vfstrace" : (1 << 21), - "reada" : (1 << 22), - "mmap" : (1 << 23), - "config" : (1 << 24), - "console" : (1 << 25), - "quota" : (1 << 26), - "sec" : (1 << 27), - } - -subsystem_names = { - "undefined" : (1 << 0), - "mdc" : (1 << 1), - "mds" : (1 << 2), - "osc" : (1 << 3), - "ost" : (1 << 4), - "class" : (1 << 5), - "log" : (1 << 6), - "llite" : (1 << 7), - "rpc" : (1 << 8), - "lnet" : (1 << 10), - "portals" : (1 << 10), # deprecated - "lnd" : (1 << 11), - "nal" : (1 << 11), # deprecated - "pinger" : (1 << 12), - "filter" : (1 << 13), - "ptlbd" : (1 << 14), # deprecated - "echo" : (1 << 15), - "ldlm" : (1 << 16), - "lov" : (1 << 17), - "ptlrouter" : (1 << 18), # deprecated - "cobd" : (1 << 19), - "sm" : (1 << 20), - "asobd" : (1 << 21), - "confobd" : (1 << 22), # deprecated - "lmv" : (1 << 23), - "cmobd" : (1 << 24), - "sec" : (1 << 25), - "sec" : (1 << 26), - "gss" : (1 << 27), - "gks" : (1 << 28), - "mgc" : (1 << 29), - "mgs" : (1 << 30), - } - - -first_cleanup_error = 0 -def cleanup_error(rc): - global first_cleanup_error - if not first_cleanup_error: - first_cleanup_error = rc - -# ============================================================ -# debugging and error funcs - -def fixme(msg = "this feature"): - raise Lustre.LconfError, msg + ' not implemented yet.' - -def panic(*args): - msg = string.join(map(str,args)) - if not config.noexec: - raise Lustre.LconfError(msg) - else: - print "! " + msg - -def log(*args): - msg = string.join(map(str,args)) - print msg - -def logall(msgs): - for s in msgs: - print string.strip(s) - -def debug(*args): - # apparently, (non)execution of the following line affects mds device - # startup order (e.g. two mds's using loopback devices), so always do it. - msg = string.join(map(str,args)) - if config.verbose: - print msg - -# ack, python's builtin int() does not support '0x123' syntax. -# eval can do it, although what a hack! -def my_int(s): - import types - if type(s) is types.IntType: - return s - try: - if (s[0:2] == '0x') or (s[0:1] == '0'): - return eval(s, {}, {}) - else: - return int(s) - except SyntaxError, e: - raise ValueError("not a number") - except TypeError, e: - raise ValueError("not a number") - except NameError, e: - raise ValueError("not a number") - -# ============================================================ -# locally defined exceptions -class CommandError (exceptions.Exception): - def __init__(self, cmd_name, cmd_err, rc=None): - self.cmd_name = cmd_name - self.cmd_err = cmd_err - self.rc = rc - - def dump(self): - import types - if type(self.cmd_err) == types.StringType: - if self.rc: - print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err) - else: - print "! %s: %s" % (self.cmd_name, self.cmd_err) - elif type(self.cmd_err) == types.ListType: - if self.rc: - print "! %s (error %d):" % (self.cmd_name, self.rc) - else: - print "! %s:" % (self.cmd_name) - for s in self.cmd_err: - print "> %s" %(string.strip(s)) - else: - print self.cmd_err - -# ============================================================ -# handle lctl interface -class LCTLInterface: - """ - Manage communication with lctl - """ - - def __init__(self, cmd): - """ - Initialize close by finding the lctl binary. - """ - self.lctl = find_prog(cmd) - self.save_file = '' - self.record_device = '' - if not self.lctl: - if config.noexec: - debug('! lctl not found') - self.lctl = 'lctl' - else: - raise CommandError('lctl', "unable to find lctl binary.") - - def use_save_file(self, file): - self.save_file = file - - def record(self, dev_name, logname): - log("Recording log", logname, "on", dev_name) - self.record_device = dev_name - self.record_log = logname - - def end_record(self): - log("End recording log", self.record_log, "on", self.record_device) - self.record_device = None - self.record_log = None - - def set_nonblock(self, fd): - fl = fcntl.fcntl(fd, F_GETFL) - fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY) - - def run(self, cmds): - """ - run lctl - the cmds are written to stdin of lctl - lctl doesn't return errors when run in script mode, so - stderr is checked - should modify command line to accept multiple commands, or - create complex command line options - """ - cmd_line = self.lctl - if self.save_file: - cmds = '\n dump ' + self.save_file + '\n' + cmds - elif self.record_device: - cmds = """ - device $%s - record %s - %s""" % (self.record_device, self.record_log, cmds) - - debug("+", cmd_line, cmds) - if config.noexec: return (0, []) - - child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command - child.tochild.write(cmds + "\nq\n") - child.tochild.close() - - # From "Python Cookbook" from O'Reilly - outfile = child.fromchild - outfd = outfile.fileno() - self.set_nonblock(outfd) - errfile = child.childerr - errfd = errfile.fileno() - self.set_nonblock(errfd) - - outdata = errdata = '' - outeof = erreof = 0 - while 1: - ready = select.select([outfd,errfd],[],[]) # Wait for input - if outfd in ready[0]: - outchunk = outfile.read() - if outchunk == '': outeof = 1 - outdata = outdata + outchunk - if errfd in ready[0]: - errchunk = errfile.read() - if errchunk == '': erreof = 1 - errdata = errdata + errchunk - if outeof and erreof: break - # end of "borrowed" code - - ret = child.wait() - if os.WIFEXITED(ret): - rc = os.WEXITSTATUS(ret) - else: - rc = 0 - if rc or len(errdata): - raise CommandError(self.lctl, errdata, rc) - return rc, outdata - - def runcmd(self, *args): - """ - run lctl using the command line - """ - cmd = string.join(map(str,args)) - debug("+", self.lctl, cmd) - rc, out = run(self.lctl, cmd) - if rc: - raise CommandError(self.lctl, out, rc) - return rc, out - - def unconfigure_network(self): - """get lnet to unreference itself""" - cmds = """ - network unconfigure""" - self.run(cmds) - - def clear_log(self, dev, log): - """ clear an existing log """ - cmds = """ - device $%s - probe - clear_log %s - quit """ % (dev, log) - self.run(cmds) - - # create a new connection - def add_uuid(self, net_type, uuid, nid): - if net_type != 'lnet' and string.find(nid,'@') < 0: - nidstr = nid + "@" + net_type - else: - nidstr = nid - cmds = "\n add_uuid %s %s" %(uuid, nidstr) - self.run(cmds) - - def connect(self, srv): - if not srv.nid_uuid: - panic('nid_uuid not set for ', srv.net_type, srv.nid) - hostaddr = srv.db.get_hostaddr() - if len(hostaddr) > 1: - panic('multiple --hostaddr for ', srv.nid_uuid, ' not supported') - elif len(hostaddr) == 1 and hostaddr[0] != srv.nid: - panic('different --hostaddr and --nid for ', srv.nid_uuid, ' not supported') - else: - self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid) - - # Recover a device - def recover(self, dev_name, new_conn): - cmds = """ - device $%s - recover %s""" %(dev_name, new_conn) - self.run(cmds) - - # disconnect one connection - def disconnect(self, srv): - if not srv.nid_uuid: - panic('nid_uuid not set for ', srv.net_type, srv.nid) - self.del_uuid(srv.nid_uuid) - - def del_uuid(self, uuid): - cmds = """ - ignore_errors - del_uuid %s - quit""" % (uuid,) - self.run(cmds) - - def attach(self, type, name, uuid): - cmds = """ - attach %s %s %s - quit""" % (type, name, uuid) - self.run(cmds) - - def setup(self, name, setup = ""): - cmds = """ - cfg_device %s - setup %s - quit""" % (name, setup) - self.run(cmds) - - def abort_recovery(self, name): - cmds = """ - ignore_errors - device $%s - abort_recovery - quit""" % (name) - self.run(cmds) - - def add_conn(self, name, conn_uuid): - cmds = """ - cfg_device %s - add_conn %s - quit""" % (name, conn_uuid) - self.run(cmds) - - # create a new device with lctl - def newdev(self, type, name, uuid, setup = ""): - self.attach(type, name, uuid); - try: - self.setup(name, setup) - except CommandError, e: - self.cleanup(name, uuid, 0) - raise e - if (config.abort_recovery): - if (type == 'obdfilter' or type == 'mds'): - self.abort_recovery(name) - - # cleanup a device - def cleanup(self, name, uuid, force, failover = 0): - if failover: force = 1 - cmds = """ - ignore_errors - cfg_device $%s - cleanup %s %s - detach - quit""" % (name, ('', 'force')[force], - ('', 'failover')[failover]) - self.run(cmds) - - # create an lov - def lov_setup(self, name, uuid, desc_uuid, mdsuuid, stripe_cnt, - stripe_sz, stripe_off, pattern): - cmds = """ - attach lov %s %s - lov_setup %s %d %d %d %s - quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off, pattern) - self.run(cmds) - - # add an OBD to a LOV - def lov_add_obd(self, name, uuid, obd_uuid, index, gen): - cmds = """ - cfg_device %s - lov_modify_tgts add %s %s %s %s - quit""" % (name, name, obd_uuid, index, gen) - self.run(cmds) - - # delete an OBD from a LOV - def lov_del_obd(self, name, uuid, obd_uuid, index, gen): - cmds = """ - cfg_device %s - lov_modify_tgts del %s %s %s %s - quit""" % (name, name, obd_uuid, index, gen) - self.run(cmds) - - # deactivate an OBD - def deactivate(self, name): - cmds = """ - cfg_device %s - deactivate - quit""" % (name) - self.run(cmds) - - # dump the log file - def dump(self, dump_file): - cmds = """ - debug_kernel %s 1 - quit""" % (dump_file) - self.run(cmds) - - # get list of devices - def device_list(self): - ret = [] - if PLATFORM == 'LINUX': - devices = '/proc/fs/lustre/devices' - if os.access(devices, os.R_OK): - try: - fp = open(devices, 'r') - ret = fp.readlines() - fp.close() - except IOError, e: - log(e) - elif PLATFORM == 'DARWIN': - rc, out = self.run("device_list") - ret = out.split("\n") - if len(ret) == 0: - return ret - tail = ret[-1] - if not tail: - # remove the last empty line - ret = ret[:-1] - return ret - - # get lustre version - def lustre_version(self): - rc, out = self.runcmd('version') - return out - - # dump mount options - def mount_option(self, profile, osc, mdc): - cmds = """ - mount_option %s %s %s - quit""" % (profile, osc, mdc) - self.run(cmds) - - # delete mount options - def del_mount_option(self, profile): - cmds = """ - del_mount_option %s - quit""" % (profile,) - self.run(cmds) - - def set_timeout(self, timeout): - cmds = """ - set_timeout %s - quit""" % (timeout,) - self.run(cmds) - - # set lustre upcall - def set_lustre_upcall(self, upcall): - cmds = """ - set_lustre_upcall %s - quit""" % (upcall,) - self.run(cmds) -# ============================================================ -# Various system-level functions -# (ideally moved to their own module) - -# Run a command and return the output and status. -# stderr is sent to /dev/null, could use popen3 to -# save it if necessary -def runcmd(cmd): - debug ("+", cmd) - if config.noexec: return (0, []) - f = os.popen(cmd + ' 2>&1') - out = f.readlines() - ret = f.close() - if ret: - ret = ret >> 8 - else: - ret = 0 - return (ret, out) - -def run(*args): - cmd = string.join(map(str,args)) - return runcmd(cmd) - -# Run a command in the background. -def run_daemon(*args): - cmd = string.join(map(str,args)) - debug ("+", cmd) - if config.noexec: return 0 - f = os.popen(cmd + ' 2>&1') - ret = f.close() - if ret: - ret = ret >> 8 - else: - ret = 0 - return ret - -# Determine full path to use for an external command -# searches dirname(argv[0]) first, then PATH -def find_prog(cmd): - syspath = string.split(os.environ['PATH'], ':') - cmdpath = os.path.dirname(sys.argv[0]) - syspath.insert(0, cmdpath); - if config.portals: - syspath.insert(0, os.path.join(config.portals, 'utils/')) - for d in syspath: - prog = os.path.join(d,cmd) - if os.access(prog, os.X_OK): - return prog - return '' - -# Recursively look for file starting at base dir -def do_find_file(base, mod): - fullname = os.path.join(base, mod) - if os.access(fullname, os.R_OK): - return fullname - for d in os.listdir(base): - dir = os.path.join(base,d) - if os.path.isdir(dir): - module = do_find_file(dir, mod) - if module: - return module - -def find_module(src_dir, dev_dir, modname): - modbase = src_dir +'/'+ dev_dir +'/'+ modname - for modext in '.ko', '.o': - module = modbase + modext - try: - if os.access(module, os.R_OK): - return module - except OSError: - pass - return None - -# is the path a block device? -def is_block(path): - s = () - try: - s = os.stat(path) - except OSError: - return 0 - return stat.S_ISBLK(s[stat.ST_MODE]) - -def my_realpath(path): - try: - if os.path.islink(path): - # get the realpath of the mount point path - if 'realpath' in dir(os.path): - real_path = os.path.realpath(path) - else: - real_path = path - link_count = 0 - while os.path.islink(real_path) and (link_count < 20): - link_count = link_count + 1 - path_link = os.readlink(real_path) - if os.path.isabs(path_link): - real_path = path_link - else: - real_path = os.path.join(os.path.dirname(real_path), path_link) - if link_count > 19: - panic("Encountered too many symbolic links resolving path:", path) - else: - real_path = path - - return real_path - except: - panic("Fatal error realpath()ing path:", path) - - -# build fs according to type -# fixme: dangerous -def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): - block_cnt = '' - jopt = '' - iopt = '' - if devsize: - if devsize < 8000: - panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"% - (dev, devsize)) - # devsize is in 1k, and fs block count is in 4k - block_cnt = devsize/4 - - if fstype in ('ext3', 'ldiskfs'): - # ext3 journal size is in megabytes - if jsize == 0: - if devsize == 0: - if not is_block(dev): - ret, out = runcmd("ls -l %s" %dev) - devsize = int(string.split(out[0])[4]) / 1024 - else: - # sfdisk works for symlink, hardlink, and realdev - ret, out = runcmd("sfdisk -s %s" %dev) - if not ret: - devsize = int(out[0]) - else: - # sfdisk -s will fail for too large block device, - # then, read the size of partition from /proc/partitions - - # get the realpath of the device - # it may be the real device, such as /dev/hda7 - # or the hardlink created via mknod for a device - real_dev = my_realpath(dev) - - # get the major and minor number of the realpath via ls - # it seems python(os.stat) does not return - # the st_rdev member of the stat structure - ret, out = runcmd("ls -l %s" %real_dev) - major = string.split(string.split(out[0])[4], ",")[0] - minor = string.split(out[0])[5] - - # get the devsize from /proc/partitions with the major and minor number - ret, out = runcmd("cat /proc/partitions") - for line in out: - if len(line) > 1: - if string.split(line)[0] == major and string.split(line)[1] == minor: - devsize = int(string.split(line)[2]) - break - - if devsize > 1024 * 1024: - jsize = ((devsize / 102400) * 4) - if jsize > 400: - jsize = 400 - if jsize: jopt = "-J size=%d" %(jsize,) - if isize: iopt = "-I %d" %(isize,) - mkfs = 'mkfs.ext2 -j -b 4096 ' - if not isblock or config.force: - mkfs = mkfs + ' -F ' - elif fstype == 'reiserfs': - # reiserfs journal size is in blocks - if jsize: jopt = "--journal_size %d" %(jsize,) - mkfs = 'mkreiserfs -ff' - else: - panic('unsupported fs type: ', fstype) - - if config.mkfsoptions != None: - mkfs = mkfs + ' ' + config.mkfsoptions - if mkfsoptions != None: - mkfs = mkfs + ' ' + mkfsoptions - (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt) - if ret: - panic("Unable to build fs:", dev, string.join(out)) - # enable hash tree indexing on fsswe - if fstype in ('ext3', 'ldiskfs'): - htree = 'tune2fs -O dir_index' - (ret, out) = run (htree, dev) - if ret: - panic("Unable to enable htree:", dev) - -# some systems use /dev/loopN, some /dev/loop/N -def loop_base(): - import re - loop = '/dev/loop' - if not os.access(loop + str(0), os.R_OK): - loop = loop + '/' - if not os.access(loop + str(0), os.R_OK): - loop='/dev/loop' - return loop - -# find loop device assigned to the file -def find_loop(file): - loop = loop_base() - for n in xrange(0, MAX_LOOP_DEVICES): - dev = loop + str(n) - if os.access(dev, os.R_OK): - (stat, out) = run('losetup', dev) - if out and stat == 0: - m = re.search(r'\((.*)\)', out[0]) - if m and file == m.group(1): - return dev - else: - break - return '' - -# create file if necessary and assign the first free loop device -def init_loop(file, size, fstype, journal_size, inode_size, mkfsoptions, reformat): - dev = find_loop(file) - if dev: - print 'WARNING file:', file, 'already mapped to', dev - return dev - if reformat or not os.access(file, os.R_OK | os.W_OK): - if size < 8000: - panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (file,size)) - (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, - file)) - if ret: - panic("Unable to create backing store:", file) - mkfs(file, size, fstype, journal_size, inode_size, mkfsoptions, isblock=0) - - loop = loop_base() - # find next free loop - for n in xrange(0, MAX_LOOP_DEVICES): - dev = loop + str(n) - if os.access(dev, os.R_OK): - (stat, out) = run('losetup', dev) - if stat: - (stat, out) = run('losetup', dev, file) - if stat: - panic("losetup failed: (%s) %s" % (stat, out[0].strip())) - return dev - else: - print "out of loop devices" - return '' - print "out of loop devices" - return '' - -# undo loop assignment -def clean_loop(file): - dev = find_loop(file) - if dev: - ret, out = run('losetup -d', dev) - if ret: - log('unable to clean loop device:', dev, 'for file:', file) - logall(out) - -# determine if dev is formatted as a filesystem -def need_format(fstype, dev): - # FIXME don't know how to implement this - return 0 - -# initialize a block device if needed -def block_dev(dev, size, fstype, reformat, autoformat, journal_size, - inode_size, mkfsoptions): - if config.noexec: return dev - if not is_block(dev): - dev = init_loop(dev, size, fstype, journal_size, inode_size, - mkfsoptions, reformat) - elif reformat or (need_format(fstype, dev) and autoformat == 'yes'): - mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions, - isblock=0) -# else: -# panic("device:", dev, -# "not prepared, and autoformat is not set.\n", -# "Rerun with --reformat option to format ALL filesystems") - return dev - -def if2addr(iface): - """lookup IP address for an interface""" - rc, out = run("/sbin/ifconfig", iface) - if rc or not out: - return None - addr = string.split(out[1])[1] - ip = string.split(addr, ':')[1] - return ip - -def def_mount_options(fstype, target, blkdev): - """returns deafult mount options for passed fstype and target (mds, ost)""" - if fstype == 'ext3' or fstype == 'ldiskfs': - mountfsoptions = "errors=remount-ro" - if target == 'ost': - if sys_get_branch() == '2.4': - mountfsoptions = "%s,asyncdel" % (mountfsoptions) - #else: - # mountfsoptions = "%s,extents,mballoc" % (mountfsoptions) - elif target == 'mds': - if config.user_xattr: - mountfsoptions = "%s,user_xattr" % (mountfsoptions) - if config.acl: - mountfsoptions = "%s,acl" % (mountfsoptions) - - if blkdev: - # grab superblock info - dumpe2fs="dumpe2fs -f -h" - (ret, sb) = run(dumpe2fs, blkdev) - if ret: - panic("unable to get superblock for ", blkdev) - - # extract journal UUID - journal_UUID='' - journal_DEV='' - for line in sb: - lst = string.split(line, ":") - if lst[0] == 'Journal UUID': - if len(lst[1]) < 3: - panic("cannot retrieve journal UUID for ", blkdev) - if string.split(lst[1])[0] != '': - journal_UUID = string.split(lst[1])[0] - debug(blkdev, 'has journal UUID', journal_UUID) - if lst[0] == 'Journal device': - if len(lst[1]) < 3: - panic("cannot retrieve journal device for ", blkdev) - if string.split(lst[1])[0] != '0x0000': - journal_DEV = string.split(lst[1])[0] - debug(blkdev, 'has journal device', journal_DEV) - break - - if len(journal_UUID) == 0 or len(journal_DEV) == 0: - debug('no external journal found for', blkdev) - # use internal journal - return mountfsoptions - - # run blkid, lookup highest-priority device with matching UUID - blkid = "blkid -o device -l -t UUID='%s'" % (journal_UUID) - (ret, devname) = run(blkid) - if ret or len(devname) == 0: - panic("cannot find external journal for ", blkdev) - debug('found', blkdev, 'journal UUID', journal_UUID, 'on', - string.replace(devname[0], '\n', '')) - - try: # sigh, python 1.5 does not support os.stat().st_rdev - jdevpath = my_realpath(string.replace(devname[0], '\n', '')) - ret, out = runcmd("ls -l %s" %jdevpath) - debug('ls -l:', out) - major = int(string.split(string.split(out[0])[4], ',')[0]) - minor = int(string.split(out[0])[5]) - debug('major', major, 'minor', minor) - rdev = major << 8 | minor - except OSError: - panic("cannot stat ", devname[0]) - - debug('found', blkdev, 'journal UUID', journal_UUID, 'on', - jdevpath, 'rdev', rdev) - - # add mount option - if string.atoi(journal_DEV, 0) != rdev: - mountfsoptions = "%s,journal_dev=%#x" % (mountfsoptions,rdev) - - return mountfsoptions - return "" - -def sys_get_branch(): - """Returns kernel release""" - return os.uname()[2][:3] - -def mod_loaded(modname): - """Check if a module is already loaded. Look in /proc/modules for it.""" - if PLATFORM == 'LINUX': - try: - fp = open('/proc/modules') - lines = fp.readlines() - fp.close() - # please forgive my tired fingers for this one - ret = filter(lambda word, mod=modname: word == mod, - map(lambda line: string.split(line)[0], lines)) - return ret - except Exception, e: - return 0 - elif PLATFORM == 'DARWIN': - ret, out = run('/usr/sbin/kextstat | /usr/bin/grep', modname) - if ret == 0: - return 1 - else: - return 0 - else: - return 0 - -# XXX: instead of device_list, ask for $name and see what we get -def is_prepared(name): - """Return true if a device exists for the name""" - if config.lctl_dump: - return 0 - if (config.noexec or config.record) and config.cleanup: - return 1 - try: - # expect this format: - # 1 UP ldlm ldlm ldlm_UUID 2 - out = lctl.device_list() - for s in out: - if name == string.split(s)[3]: - return 1 - except CommandError, e: - e.dump() - return 0 - -def is_network_prepared(): - """If the any device exists, then assume that all networking - has been configured""" - out = lctl.device_list() - return len(out) > 0 - -def fs_is_mounted(path): - """Return true if path is a mounted lustre filesystem""" - try: - real_path = my_realpath(path) - - fp = open('/proc/mounts') - lines = fp.readlines() - fp.close() - for l in lines: - a = string.split(l) - if a[1] == real_path and a[2] == 'lustre_lite': - return 1 - except IOError, e: - log(e) - return 0 - -class kmod: - """Manage kernel modules""" - def __init__(self, lustre_dir, portals_dir): - self.lustre_dir = lustre_dir - self.portals_dir = portals_dir - self.kmodule_list = [] - - def add_portals_module(self, dev_dir, modname): - """Append a module to list of modules to load.""" - self.kmodule_list.append((self.portals_dir, dev_dir, modname)) - - def add_lustre_module(self, dev_dir, modname): - """Append a module to list of modules to load.""" - self.kmodule_list.append((self.lustre_dir, dev_dir, modname)) - - def load_module(self): - """Load all the modules in the list in the order they appear.""" - for src_dir, dev_dir, mod in self.kmodule_list: - if mod_loaded(mod) and not config.noexec: - continue - log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir) - if PLATFORM == 'LINUX': - options = '' - if mod == 'lnet': - #For LNET we really need modprobe to load defined LNDs - run('/sbin/modprobe lnet') - #But if that fails, try insmod anyhow with dev option - #accept=all for dev liblustre testing - options = 'accept=all' - if src_dir: - module = find_module(src_dir, dev_dir, mod) - if not module: - panic('module not found:', mod) - (rc, out) = run('/sbin/insmod', module, options) - if rc and not mod_loaded(mod): - if rc == 1: - print("Bad module options? Check dmesg.") - raise CommandError('insmod', out, rc) - else: - (rc, out) = run('/sbin/modprobe', mod) - if rc and not mod_loaded(mod): - if rc == 1: - print("Bad module options? Check dmesg.") - raise CommandError('modprobe', out, rc) - elif PLATFORM == 'DARWIN': - run('/sbin/kextload', KEXTPATH + mod + '.kext'); - - def cleanup_module(self): - """Unload the modules in the list in reverse order.""" - - rev = self.kmodule_list[:] # make *copy* of list - rev.reverse() - for src_dir, dev_dir, mod in rev: - if not mod_loaded(mod) and not config.noexec: - continue - if mod == 'ksocklnd' and not config.noexec: - # Ignore ksocklnd in module list (lnet will remove) - continue - log('unloading module:', mod) - if mod == 'lnet' and not config.noexec: - # remove any self-ref portals created - lctl.unconfigure_network() - if config.dump: - debug('dumping debug log to', config.dump) - # debug hack - lctl.dump(config.dump) - log('unloading the network') - lctl.unconfigure_network() - if mod_loaded("ksocklnd"): - if PLATFORM == 'LINUX': - run('/sbin/rmmod ksocklnd') - elif PLATFORM == 'DARWIN': - run('/sbin/kextunload', KEXTPATH+'ksocklnd.kext') - if mod_loaded("kqswlnd"): - run('/sbin/rmmod kqswlnd') - if mod_loaded("kgmlnd"): - run('/sbin/rmmod kgmlnd') - if mod_loaded("kopeniblnd"): - run('/sbin/rmmod kopeniblnd') - if mod_loaded("kiiblnd"): - run('/sbin/rmmod kiiblnd') - if mod_loaded("kviblnd"): - run('/sbin/rmmod kviblnd') - if mod_loaded("kciblnd"): - run('/sbin/rmmod kciblnd') - if mod_loaded("ko2iblnd"): - run('/sbin/rmmod ko2iblnd') - if mod_loaded("kralnd"): - run('/sbin/rmmod kralnd') - if mod_loaded("kptllnd"): - run('/sbin/rmmod kptllnd') - if PLATFORM == 'LINUX': - (rc, out) = run('/sbin/rmmod', mod) - elif PLATFORM == 'DARWIN': - (rc, out) = run('/sbin/kextunload', KEXTPATH+mod+'.kext'); - if rc: - log('! unable to unload module:', mod) - logall(out) - - -# ============================================================ -# Classes to prepare and cleanup the various objects -# -class Module: - """ Base class for the rest of the modules. The default cleanup method is - defined here, as well as some utilitiy funcs. - """ - def __init__(self, module_name, db): - self.db = db - self.module_name = module_name - self.name = self.db.getName() - self.uuid = self.db.getUUID() - self._server = None - self._connected = 0 - self.kmod = kmod(config.lustre, config.portals) - - def info(self, *args): - msg = string.join(map(str,args)) - log (self.module_name + ":", self.name, self.uuid, msg) - - def cleanup(self): - """ default cleanup, used for most modules """ - self.info() - try: - lctl.cleanup(self.name, self.uuid, config.force) - except CommandError, e: - log(self.module_name, "cleanup failed: ", self.name) - e.dump() - cleanup_error(e.rc) - - def add_portals_module(self, dev_dir, modname): - """Append a module to list of modules to load.""" - self.kmod.add_portals_module(dev_dir, modname) - - def add_lustre_module(self, dev_dir, modname): - """Append a module to list of modules to load.""" - self.kmod.add_lustre_module(dev_dir, modname) - - def load_module(self): - """Load all the modules in the list in the order they appear.""" - self.kmod.load_module() - - def cleanup_module(self): - """Unload the modules in the list in reverse order.""" - if self.safe_to_clean(): - self.kmod.cleanup_module() - - def safe_to_clean(self): - return 1 - - def safe_to_clean_modules(self): - return self.safe_to_clean() - -class Network(Module): - def __init__(self,db,nid_uuid=0): - Module.__init__(self, 'NETWORK', db) - self.net_type = self.db.get_val('nettype') - self.nid = self.db.get_val('nid', '*') - self.cluster_id = self.db.get_val('clusterid', "0") - self.port = self.db.get_val_int('port', 0) - self.nid_uuid = nid_uuid - self.add_portals_module('libcfs', 'libcfs') - self.add_portals_module('lnet', 'lnet') - # Add the socklnd for developers without modprobe.conf (umls) - self.add_portals_module('klnds/socklnd', 'ksocklnd') - - def prepare(self): - if is_network_prepared(): - return - self.info(self.net_type, self.nid) - if self.net_type == 'tcp': - sys_tweak_socknal() - if self.net_type == 'elan': - sys_optimize_elan() - - def safe_to_clean(self): - if PLATFORM == 'LINUX': - return not is_network_prepared() - elif PLATFORM == 'DARWIN': - # XXX always assume it's safe to clean - return 1 - return 1 - - def cleanup(self): - self.info(self.net_type, self.nid) - -# This is only needed to load the modules; the LDLM device -# is now created automatically. -class LDLM(Module): - def __init__(self,db): - Module.__init__(self, 'LDLM', db) - self.add_lustre_module('lvfs', 'lvfs') - self.add_lustre_module('obdclass', 'obdclass') - self.add_lustre_module('ptlrpc', 'ptlrpc') - self.add_lustre_module('ptlrpc/gss', 'ptlrpc_gss') - - def prepare(self): - return - - def cleanup(self): - return - -class LOV(Module): - def __init__(self, db, uuid, fs_name, name_override = None, config_only = None): - Module.__init__(self, 'LOV', db) - if name_override != None: - self.name = "lov_%s" % name_override - self.add_lustre_module('lov', 'lov') - self.mds_uuid = self.db.get_first_ref('mds') - self.stripe_sz = self.db.get_val_int('stripesize', 1048576) - self.stripe_off = self.db.get_val_int('stripeoffset', 0) - self.pattern = self.db.get_val_int('stripepattern', 0) - self.devlist = [] - self.stripe_cnt = self.db.get_val_int('stripecount', 1) - self.osclist = [] - self.desc_uuid = self.uuid - self.uuid = generate_client_uuid(self.name) - self.fs_name = fs_name - # settings below here won't be seen by the MDSDEV code! - if config_only: - self.config_only = 1 - return - self.config_only = None - mds = self.db.lookup(self.mds_uuid) - self.mds_name = mds.getName() - self.devlist = self.db.get_lov_tgts('lov_tgt') - for (obd_uuid, index, gen, active) in self.devlist: - if obd_uuid == '': - continue - obd = self.db.lookup(obd_uuid) - osc = get_osc(obd, self.uuid, fs_name) - if osc: - self.osclist.append((osc, index, gen, active)) - else: - panic('osc not found:', obd_uuid) - if self.osclist == []: - debug("get_lov_tgts failed, using get_refs"); - index = 0 - self.devlist = self.db.get_refs('obd') - for obd_uuid in self.devlist: - obd = self.db.lookup(obd_uuid) - osc = get_osc(obd, self.uuid, fs_name) - if osc: - self.osclist.append((osc, index, 1, 1)) - else: - panic('osc not found:', obd_uuid) - index = index + 1 - if self.osclist == []: - panic('No OSCs configured for LOV') - debug('dbg LOV __init__:', self.osclist, self.devlist, self.stripe_cnt) - - def prepare(self): - debug('dbg LOV prepare') - if is_prepared(self.name): - return - debug('dbg LOV prepare:', self.osclist, self.devlist) - self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, - self.stripe_off, self.pattern, self.devlist, - self.mds_name) - lctl.lov_setup(self.name, self.uuid, - self.desc_uuid, self.mds_name, self.stripe_cnt, - self.stripe_sz, self.stripe_off, self.pattern) - if self.osclist == []: - panic('No OSCs configured for LOV?') - for (osc, index, gen, active) in self.osclist: - target_uuid = osc.target_uuid - try: - # Only ignore connect failures with --force, which - # isn't implemented here yet. - osc.active = active - osc.prepare(ignore_connect_failure=0) - except CommandError, e: - print "Error preparing OSC %s\n" % osc.uuid - raise e - lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen) - - def cleanup(self): - if is_prepared(self.name): - Module.cleanup(self) - for (osc, index, gen, active) in self.osclist: - osc.cleanup() - if self.config_only: - panic("Can't clean up config_only LOV ", self.name) - - def load_module(self): - if self.config_only: - panic("Can't load modules for config_only LOV ", self.name) - for (osc, index, gen, active) in self.osclist: - osc.load_module() - break - Module.load_module(self) - - def cleanup_module(self): - if self.config_only: - panic("Can't cleanup modules for config_only LOV ", self.name) - Module.cleanup_module(self) - for (osc, index, gen, active) in self.osclist: - if active: - osc.cleanup_module() - break - -class MDSDEV(Module): - def __init__(self,db): - Module.__init__(self, 'MDSDEV', db) - self.devpath = self.db.get_val('devpath','') - self.size = self.db.get_val_int('devsize', 0) - self.journal_size = self.db.get_val_int('journalsize', 0) - - self.fstype = self.db.get_val('fstype', '') - if sys_get_branch() == '2.4' and self.fstype == 'ldiskfs': - self.fstype = 'ext3' - elif sys_get_branch() == '2.6' and self.fstype == 'ext3': - self.fstype = 'ldiskfs' - - self.nspath = self.db.get_val('nspath', '') - self.mkfsoptions = '-i 4096 ' + self.db.get_val('mkfsoptions', '') - self.mountfsoptions = self.db.get_val('mountfsoptions', '') - if config.quota: - self.quota = config.quota - else: - self.quota = self.db.get_val('quota', '') - # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid - target_uuid = self.db.get_first_ref('target') - mds = self.db.lookup(target_uuid) - self.name = mds.getName() - self.filesystem_uuids = mds.get_refs('filesystem') - # FIXME: if fstype not set, then determine based on kernel version - self.format = self.db.get_val('autoformat', "no") - if mds.get_val('failover', '1') != '0': - self.failover_mds = 'f' - else: - self.failover_mds = 'n' - active_uuid = get_active_target(mds) - if not active_uuid: - panic("No target device found:", target_uuid) - if active_uuid == self.uuid: - self.active = 1 - else: - self.active = 0 - if self.active and config.group and config.group != mds.get_val('group', mds.get_val('name')): - self.active = 0 - - self.inode_size = self.db.get_val_int('inodesize', 0) - debug('original inode_size ', self.inode_size) - if self.inode_size == 0: - # find the LOV for this MDS - lovconfig_uuid = mds.get_first_ref('lovconfig') - if not lovconfig_uuid: - panic("No LOV config found for MDS ", mds.name) - lovconfig = mds.lookup(lovconfig_uuid) - lov_uuid = lovconfig.get_first_ref('lov') - if not lov_uuid: - panic("No LOV found for lovconfig ", lovconfig.name) - lov = LOV(self.db.lookup(lov_uuid), lov_uuid, 'FS_name', config_only = 1) - - # default stripe count controls default inode_size - if (lov.stripe_cnt > 0): - stripe_count = lov.stripe_cnt - else: - stripe_count = 1 - if stripe_count > 77: - self.inode_size = 512 - elif stripe_count > 34: - self.inode_size = 2048 - elif stripe_count > 13: - self.inode_size = 1024 - #elif stripe_count < 3: - # self.inode_size = 256 - else: - self.inode_size = 512 - debug('stripe_count ', stripe_count,' inode_size ',self.inode_size) - - self.target_dev_uuid = self.uuid - self.uuid = target_uuid - - # loading modules - if self.quota: - self.add_lustre_module('quota', 'lquota') - self.add_lustre_module('mdc', 'mdc') - self.add_lustre_module('osc', 'osc') - self.add_lustre_module('lov', 'lov') - self.add_lustre_module('mds', 'mds') - if self.fstype == 'ldiskfs': - self.add_lustre_module('ldiskfs', 'ldiskfs') - if self.fstype: - self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype)) - - def load_module(self): - if self.active: - Module.load_module(self) - - def prepare(self): - if is_prepared(self.name): - return - if not self.active: - debug(self.uuid, "not active") - return - if config.reformat: - # run write_conf automatically, if --reformat used - self.write_conf() - self.info(self.devpath, self.fstype, self.size, self.format) - # never reformat here - blkdev = block_dev(self.devpath, self.size, self.fstype, 0, - self.format, self.journal_size, self.inode_size, - self.mkfsoptions) - if not is_prepared('MDT'): - lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") - try: - mountfsoptions = def_mount_options(self.fstype, 'mds', blkdev) - - if config.mountfsoptions: - if mountfsoptions: - mountfsoptions = mountfsoptions + ',' + config.mountfsoptions - else: - mountfsoptions = config.mountfsoptions - if self.mountfsoptions: - mountfsoptions = mountfsoptions + ',' + self.mountfsoptions - else: - if self.mountfsoptions: - if mountfsoptions: - mountfsoptions = mountfsoptions + ',' + self.mountfsoptions - else: - mountfsoptions = self.mountfsoptions - - print 'MDS mount options: ' + mountfsoptions - - lctl.newdev("mds", self.name, self.uuid, - setup ="%s %s %s %s %s" %(blkdev, self.fstype, self.name, - mountfsoptions, self.quota)) - self.group_upcall = self.db.get_val('group_upcall','') - sys_set_group_upcall(self.name, self.group_upcall) - - except CommandError, e: - if e.rc == 2: - panic("MDS failed to start. Check the syslog for details." + - " (May need to run lconf --write-conf)") - else: - raise e - - def write_conf(self): - if is_prepared(self.name): - return - self.info(self.devpath, self.fstype, self.format) - blkdev = block_dev(self.devpath, self.size, self.fstype, - config.reformat, self.format, self.journal_size, - self.inode_size, self.mkfsoptions) - lctl.newdev("mds", self.name, self.uuid, - setup ="%s %s" %(blkdev, self.fstype)) - - # record logs for the MDS lov - for uuid in self.filesystem_uuids: - log("recording clients for filesystem:", uuid) - fs = self.db.lookup(uuid) - obd_uuid = fs.get_first_ref('obd') - client_uuid = generate_client_uuid(self.name) - client = VOSC(self.db.lookup(obd_uuid), client_uuid, self.name, - self.name) - config.record = 1 - lctl.clear_log(self.name, self.name) - lctl.record(self.name, self.name) - client.prepare() - lctl.mount_option(self.name, client.get_name(), "") - lctl.end_record() - config.record = 0 - - # record logs for each client - if config.ldapurl: - config_options = "--ldapurl " + config.ldapurl + " --config " + config.config - else: - config_options = CONFIG_FILE - - for node_db in self.db.lookup_class('node'): - client_name = node_db.getName() - for prof_uuid in node_db.get_refs('profile'): - prof_db = node_db.lookup(prof_uuid) - # refactor this into a funtion to test "clientness" of a node. - for ref_class, ref_uuid in prof_db.get_all_refs(): - if ref_class in ('mountpoint','echoclient'): - thing = self.db.lookup(ref_uuid); - fs_uuid = thing.get_first_ref('filesystem') - if not fs_uuid in self.filesystem_uuids: - continue; - - log("Recording log", client_name, "on", self.name) - old_noexec = config.noexec - config.noexec = 0 - noexec_opt = ('', '-n') - ret, out = run (sys.argv[0], - noexec_opt[old_noexec == 1], - " -v --record --nomod --old_conf", - "--record_log", client_name, - "--record_device", self.name, - "--node", client_name, - config_options) - if ret: - lctl.clear_log(self.name, client_name) - print out - self.cleanup() - panic("Record client log %s on %s failed" %( - client_name, self.name)) - if config.verbose: - for s in out: log("record> ", string.strip(s)) - config.noexec = old_noexec - try: - lctl.cleanup(self.name, self.uuid, config.force, config.failover) - except CommandError, e: - log(self.module_name, "cleanup failed: ", self.name) - e.dump() - cleanup_error(e.rc) - Module.cleanup(self) - clean_loop(self.devpath) - - #change the mtime of LLOG to match the XML creation time - if toplustreDB.get_mtime(): - mtime = toplustreDB.get_mtime() - debug("changing mtime of LOGS to %s" %mtime) - ret, mktemp = runcmd("mktemp /tmp/lustre-cmd.XXXXXXXX") - if ret: - log(self.module_name, "create mtime LOGS cmdfile failed: ", self.name) - else: - mtimecmdfile = string.split(mktemp[0])[0] - fd = os.open(mtimecmdfile, os.O_RDWR | os.O_CREAT) - os.write(fd, "\n\n\n\n\n%s\n\n" %mtime) - os.close(fd) - cmd = "debugfs -w -R \"mi /LOGS\" <%s %s" %(mtimecmdfile, self.devpath) - ret, outs = runcmd(cmd) - os.remove(mtimecmdfile) - if ret: - print "Can not change mtime of LOGS by debugfs." - - def mds_remaining(self): - out = lctl.device_list() - for s in out: - if string.split(s)[2] in ('mds',): - if string.split(s)[1] in ('ST',): - return 0 - return 1 - - def safe_to_clean(self): - return self.active - - def safe_to_clean_modules(self): - return not self.mds_remaining() - - def cleanup(self): - if not self.active: - debug(self.uuid, "not active") - return - self.info() - if is_prepared(self.name): - try: - lctl.cleanup(self.name, self.uuid, config.force, - config.failover) - except CommandError, e: - log(self.module_name, "cleanup failed: ", self.name) - e.dump() - cleanup_error(e.rc) - Module.cleanup(self) - if not self.mds_remaining() and is_prepared('MDT'): - try: - lctl.cleanup("MDT", "MDT_UUID", config.force, - config.failover) - except CommandError, e: - print "cleanup failed: ", self.name - e.dump() - cleanup_error(e.rc) - clean_loop(self.devpath) - -class OSD(Module): - def __init__(self, db): - Module.__init__(self, 'OSD', db) - self.osdtype = self.db.get_val('osdtype') - self.devpath = self.db.get_val('devpath', '') - self.size = self.db.get_val_int('devsize', 0) - self.journal_size = self.db.get_val_int('journalsize', 0) - - # now as we store fids in EA on OST we need to make inode bigger - self.inode_size = self.db.get_val_int('inodesize', 0) - if self.inode_size == 0: - self.inode_size = 256 - self.mkfsoptions = self.db.get_val('mkfsoptions', '') - # Allocate fewer inodes on large OST devices. Most filesystems - # can be much more aggressive than this, but by default we can't. - if self.size > 1000000: - self.mkfsoptions = '-i 16384 ' + self.mkfsoptions - self.mountfsoptions = self.db.get_val('mountfsoptions', '') - if config.quota: - self.quota = config.quota - else: - self.quota = self.db.get_val('quota', '') - - self.fstype = self.db.get_val('fstype', '') - if sys_get_branch() == '2.4' and self.fstype == 'ldiskfs': - self.fstype = 'ext3' - elif sys_get_branch() == '2.6' and self.fstype == 'ext3': - self.fstype = 'ldiskfs' - - self.nspath = self.db.get_val('nspath', '') - target_uuid = self.db.get_first_ref('target') - ost = self.db.lookup(target_uuid) - self.name = ost.getName() - self.format = self.db.get_val('autoformat', 'yes') - if ost.get_val('failover', '1') != '0': - self.failover_ost = 'f' - else: - self.failover_ost = 'n' - - active_uuid = get_active_target(ost) - if not active_uuid: - panic("No target device found:", target_uuid) - if active_uuid == self.uuid: - self.active = 1 - else: - self.active = 0 - if self.active and config.group and config.group != ost.get_val('group', ost.get_val('name')): - self.active = 0 - - self.target_dev_uuid = self.uuid - self.uuid = target_uuid - # modules - if self.quota: - self.add_lustre_module('quota', 'lquota') - self.add_lustre_module('ost', 'ost') - # FIXME: should we default to ext3 here? - if self.fstype == 'ldiskfs': - self.add_lustre_module('ldiskfs', 'ldiskfs') - if self.fstype: - self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype)) - self.add_lustre_module(self.osdtype, self.osdtype) - - def load_module(self): - if self.active: - Module.load_module(self) - - # need to check /proc/mounts and /etc/mtab before - # formatting anything. - # FIXME: check if device is already formatted. - def prepare(self): - if is_prepared(self.name): - return - if not self.active: - debug(self.uuid, "not active") - return - self.info(self.osdtype, self.devpath, self.size, self.fstype, - self.format, self.journal_size, self.inode_size) - if self.osdtype == 'obdecho': - blkdev = '' - else: - blkdev = block_dev(self.devpath, self.size, self.fstype, - config.reformat, self.format, self.journal_size, - self.inode_size, self.mkfsoptions) - - mountfsoptions = def_mount_options(self.fstype, 'ost', blkdev) - - if config.mountfsoptions: - if mountfsoptions: - mountfsoptions = mountfsoptions + ',' + config.mountfsoptions - else: - mountfsoptions = config.mountfsoptions - if self.mountfsoptions: - mountfsoptions = mountfsoptions + ',' + self.mountfsoptions - else: - if self.mountfsoptions: - if mountfsoptions: - mountfsoptions = mountfsoptions + ',' + self.mountfsoptions - else: - mountfsoptions = self.mountfsoptions - - print 'OST mount options: ' + mountfsoptions - - lctl.newdev(self.osdtype, self.name, self.uuid, - setup ="%s %s %s %s %s" %(blkdev, self.fstype, - self.failover_ost, mountfsoptions, - self.quota)) - if not is_prepared('OSS'): - lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="") - - def osd_remaining(self): - out = lctl.device_list() - for s in out: - if string.split(s)[2] in ('obdfilter', 'obdecho'): - return 1 - - def safe_to_clean(self): - return self.active - - def safe_to_clean_modules(self): - return not self.osd_remaining() - - def cleanup(self): - if not self.active: - debug(self.uuid, "not active") - return - if is_prepared(self.name): - self.info() - try: - lctl.cleanup(self.name, self.uuid, config.force, - config.failover) - except CommandError, e: - log(self.module_name, "cleanup failed: ", self.name) - e.dump() - cleanup_error(e.rc) - if not self.osd_remaining() and is_prepared('OSS'): - try: - lctl.cleanup("OSS", "OSS_UUID", config.force, - config.failover) - except CommandError, e: - print "cleanup failed: ", self.name - e.dump() - cleanup_error(e.rc) - if not self.osdtype == 'obdecho': - clean_loop(self.devpath) - -# Generic client module, used by OSC and MDC -class Client(Module): - def __init__(self, tgtdb, uuid, module, fs_name, self_name=None, - module_dir=None): - self.target_name = tgtdb.getName() - self.target_uuid = tgtdb.getUUID() - self.db = tgtdb - self.backup_targets = [] - - self.tgt_dev_uuid = get_active_target(tgtdb) - if not self.tgt_dev_uuid: - panic("No target device found for target:", self.target_name) - - self.kmod = kmod(config.lustre, config.portals) - self._server = None - self._connected = 0 - - self.module = module - self.module_name = string.upper(module) - if not self_name: - self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(), - self.target_name, fs_name) - else: - self.name = self_name - self.uuid = uuid - self.lookup_server(self.tgt_dev_uuid) - self.lookup_backup_targets() - self.fs_name = fs_name - if not module_dir: - module_dir = module - self.add_lustre_module(module_dir, module) - - def lookup_server(self, srv_uuid): - """ Lookup a server's network information """ - self._server_nets = get_ost_net(self.db, srv_uuid) - if len(self._server_nets) == 0: - panic("Unable to find a server for:", srv_uuid) - - def get_servers(self): - return self._server_nets - - def lookup_backup_targets(self): - """ Lookup alternative network information """ - prof_list = toplustreDB.get_refs('profile') - for prof_uuid in prof_list: - prof_db = toplustreDB.lookup(prof_uuid) - if not prof_db: - panic("profile:", prof_uuid, "not found.") - for ref_class, ref_uuid in prof_db.get_all_refs(): - if ref_class in ('osd', 'mdsdev'): - devdb = toplustreDB.lookup(ref_uuid) - uuid = devdb.get_first_ref('target') - if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid: - debug("add backup target", ref_uuid) - self.backup_targets.append(ref_uuid) - - def prepare(self, ignore_connect_failure = 0): - self.info(self.target_uuid) - if is_prepared(self.name): - self.cleanup() - try: - srv_list = self.get_servers() - debug('dbg CLIENT __prepare__:', self.target_uuid, srv_list) - for srv in srv_list: - lctl.connect(srv) - if len(srv_list) == 0: - panic("no servers for ", self.target_uuid) - except CommandError, e: - if not ignore_connect_failure: - raise e - - if srv_list[0]: - srv = srv_list[0] - if self.target_uuid in config.inactive and self.permits_inactive(): - debug("%s inactive" % self.target_uuid) - inactive_p = "inactive" - else: - debug("%s active" % self.target_uuid) - inactive_p = "" - lctl.newdev(self.module, self.name, self.uuid, - setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid, - inactive_p)) - else: - panic("Unable to create OSC for ", self.target_uuid) - - for tgt_dev_uuid in self.backup_targets: - this_nets = get_ost_net(toplustreDB, tgt_dev_uuid) - if len(this_nets) == 0: - panic ("Unable to find a backup server for:", tgt_dev_uuid) - else: - for srv in this_nets: - lctl.connect(srv) - if srv: - lctl.add_conn(self.name, srv.nid_uuid); - - - def cleanup(self): - if is_prepared(self.name): - Module.cleanup(self) - srv_list = self.get_servers() - for srv in srv_list: - lctl.disconnect(srv) - for tgt_dev_uuid in self.backup_targets: - this_nets = get_ost_net(toplustreDB, tgt_dev_uuid) - if len(this_nets) == 0: - panic ("Unable to find a backup server for:", tgt_dev_uuid) - else: - for srv in this_nets: - lctl.disconnect(srv) - -class MDC(Client): - def __init__(self, db, uuid, fs_name): - Client.__init__(self, db, uuid, 'mdc', fs_name) - - def permits_inactive(self): - return 0 - -class OSC(Client): - def __init__(self, db, uuid, fs_name): - Client.__init__(self, db, uuid, 'osc', fs_name) - - def permits_inactive(self): - return 1 - -class COBD(Module): - def __init__(self, db): - Module.__init__(self, 'COBD', db) - self.real_uuid = self.db.get_first_ref('realobd') - self.cache_uuid = self.db.get_first_ref('cacheobd') - self.add_lustre_module('cobd' , 'cobd') - - # need to check /proc/mounts and /etc/mtab before - # formatting anything. - # FIXME: check if device is already formatted. - def prepare(self): - if is_prepared(self.name): - return - self.info(self.real_uuid, self.cache_uuid) - lctl.newdev("cobd", self.name, self.uuid, - setup ="%s %s" %(self.real_uuid, self.cache_uuid)) - - -# virtual interface for OSC and LOV -class VOSC(Module): - def __init__(self, db, uuid, fs_name, name_override = None, quota = None): - Module.__init__(self, 'VOSC', db) - if quota: - self.add_lustre_module('quota', 'lquota') - if db.get_class() == 'lov': - self.osc = LOV(db, uuid, fs_name, name_override) - else: - self.osc = get_osc(db, uuid, fs_name) - def get_uuid(self): - return self.osc.uuid - def get_name(self): - return self.osc.name - def prepare(self): - self.osc.prepare() - def cleanup(self): - self.osc.cleanup() - def load_module(self): - Module.load_module(self) - self.osc.load_module() - def cleanup_module(self): - self.osc.cleanup_module() - Module.cleanup_module(self) - - -class ECHO_CLIENT(Module): - def __init__(self,db): - Module.__init__(self, 'ECHO_CLIENT', db) - self.add_lustre_module('obdecho', 'obdecho') - self.obd_uuid = self.db.get_first_ref('obd') - obd = self.db.lookup(self.obd_uuid) - self.uuid = generate_client_uuid(self.name) - self.osc = VOSC(obd, self.uuid, self.name) - - def prepare(self): - if is_prepared(self.name): - return - self.osc.prepare() # XXX This is so cheating. -p - self.info(self.obd_uuid) - - lctl.newdev("echo_client", self.name, self.uuid, - setup = self.osc.get_name()) - - def cleanup(self): - if is_prepared(self.name): - Module.cleanup(self) - self.osc.cleanup() - - def load_module(self): - self.osc.load_module() - Module.load_module(self) - - def cleanup_module(self): - Module.cleanup_module(self) - self.osc.cleanup_module() - - -def generate_client_uuid(name): - client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576), - name, - int(random.random() * 1048576), - int(random.random() * 1048576)) - return client_uuid[:36] - - -def my_rstrip(s, chars): - """my_rstrip(s, chars) -> strips any instances of the characters - found in chars from the right side of string s""" - # XXX required because python versions pre 2.2.3 don't allow - #string.rstrip() to take alternate char lists - import string - ns=s - try: - ns = string.rstrip(s, '/') - except TypeError, e: - for i in range(len(s) - 1, 0, -1): - if s[i] in chars: - continue - else: - ns = s[0:i+1] - break - return ns - - -class Mountpoint(Module): - def __init__(self,db): - Module.__init__(self, 'MTPT', db) - self.path = my_rstrip(self.db.get_val('path'), '/') - self.clientoptions = self.db.get_val('clientoptions', '') - self.fs_uuid = self.db.get_first_ref('filesystem') - fs = self.db.lookup(self.fs_uuid) - self.mds_uuid = fs.get_first_ref('mds') - mds_db = self.db.lookup(self.mds_uuid) - if config.quota: - quota = config.quota - else: - quota = mds_db.get_val('quota', config.quota) - self.obd_uuid = fs.get_first_ref('obd') - obd = self.db.lookup(self.obd_uuid) - client_uuid = generate_client_uuid(self.name) - self.vosc = VOSC(obd, client_uuid, self.name, quota=quota) - self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid) - - self.add_lustre_module('mdc', 'mdc') - self.add_lustre_module('llite', 'llite') - - def prepare(self): - if fs_is_mounted(self.path): - log(self.path, "already mounted.") - return - self.vosc.prepare() - self.mdc.prepare() - mdc_name = self.mdc.name - - self.info(self.path, self.mds_uuid, self.obd_uuid) - if config.record or config.lctl_dump: - lctl.mount_option(local_node_name, self.vosc.get_name(), mdc_name) - return - - if config.clientoptions: - if self.clientoptions: - self.clientoptions = self.clientoptions + ',' + config.clientoptions - else: - self.clientoptions = config.clientoptions - if self.clientoptions: - self.clientoptions = ',' + self.clientoptions - # Linux kernel will deal with async and not pass it to ll_fill_super, - # so replace it with Lustre async - self.clientoptions = string.replace(self.clientoptions, "async", "lasync") - - cmd = "mount -t lustre_lite -o osc=%s,mdc=%s%s %s %s" % \ - (self.vosc.get_name(), mdc_name, self.clientoptions, config.config, self.path) - run("mkdir", self.path) - ret, val = run(cmd) - if ret: - self.mdc.cleanup() - self.vosc.cleanup() - panic("mount failed:", self.path, ":", string.join(val)) - - def cleanup(self): - self.info(self.path, self.mds_uuid,self.obd_uuid) - - if config.record or config.lctl_dump: - lctl.del_mount_option(local_node_name) - else: - if fs_is_mounted(self.path): - if config.force: - (rc, out) = run("umount", "-f", self.path) - else: - (rc, out) = run("umount", self.path) - if rc: - raise CommandError('umount', out, rc) - - if fs_is_mounted(self.path): - panic("fs is still mounted:", self.path) - - self.mdc.cleanup() - self.vosc.cleanup() - - def load_module(self): - self.vosc.load_module() - Module.load_module(self) - - def cleanup_module(self): - Module.cleanup_module(self) - self.vosc.cleanup_module() - - -# ============================================================ -# misc query functions - -def get_ost_net(self, osd_uuid): - srv_list = [] - if not osd_uuid: - return srv_list - osd = self.lookup(osd_uuid) - node_uuid = osd.get_first_ref('node') - node = self.lookup(node_uuid) - if not node: - panic("unable to find node for osd_uuid:", osd_uuid, - " node_ref:", node_uuid) - for net_uuid in node.get_networks(): - db = node.lookup(net_uuid) - net = Network(db, node_uuid) - srv_list.append(net) - return srv_list - - -# the order of iniitailization is based on level. -def getServiceLevel(self): - type = self.get_class() - ret=0; - if type in ('network',): - ret = 5 - elif type in ('ldlm',): - ret = 20 - elif type in ('osd', 'cobd'): - ret = 30 - elif type in ('mdsdev',): - ret = 40 - elif type in ('mountpoint', 'echoclient'): - ret = 70 - else: - panic("Unknown type: ", type) - - if ret < config.minlevel or ret > config.maxlevel: - ret = 0 - return ret - -# -# return list of services in a profile. list is a list of tuples -# [(level, db_object),] -def getServices(self): - list = [] - for ref_class, ref_uuid in self.get_all_refs(): - servdb = self.lookup(ref_uuid) - if servdb: - level = getServiceLevel(servdb) - if level > 0: - list.append((level, servdb)) - else: - panic('service not found: ' + ref_uuid) - - list.sort() - return list - - -############################################################ -# MDC UUID hack - -# FIXME: clean this mess up! -# -# OSC is no longer in the xml, so we have to fake it. -# this is getting ugly and begging for another refactoring -def get_osc(ost_db, uuid, fs_name): - osc = OSC(ost_db, uuid, fs_name) - return osc - -def get_mdc(db, uuid, fs_name, mds_uuid): - mds_db = db.lookup(mds_uuid); - if not mds_db: - panic("no mds:", mds_uuid) - mdc = MDC(mds_db, uuid, fs_name) - return mdc - -def get_active_target(db): - target_uuid = db.getUUID() - target_name = db.getName() - node_name = get_select(target_name) - if node_name: - tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid) - else: - tgt_dev_uuid = db.get_first_ref('active') - return tgt_dev_uuid - -def get_server_by_nid_uuid(db, nid_uuid): - for n in db.lookup_class("network"): - net = Network(n) - if net.nid_uuid == nid_uuid: - return net - - -############################################################ -# lconf level logic -# Start a service. -def newService(db): - type = db.get_class() - debug('Service:', type, db.getName(), db.getUUID()) - n = None - if type == 'ldlm': - n = LDLM(db) - elif type == 'lov': - n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID") - elif type == 'network': - n = Network(db) - elif type == 'osd': - n = OSD(db) - elif type == 'cobd': - n = COBD(db) - elif type == 'mdsdev': - n = MDSDEV(db) - elif type == 'mountpoint': - n = Mountpoint(db) - elif type == 'echoclient': - n = ECHO_CLIENT(db) - else: - panic("unknown service type:", type) - return n - -# -# Prepare the system to run lustre using a particular profile -# in a the configuration. -# * load & the modules -# * setup networking for the current node -# * make sure partitions are in place and prepared -# * initialize devices with lctl -# Levels is important, and needs to be enforced. -def for_each_profile(db, prof_list, operation): - for prof_uuid in prof_list: - prof_db = db.lookup(prof_uuid) - if not prof_db: - panic("profile:", prof_uuid, "not found.") - services = getServices(prof_db) - operation(services) - -def doWriteconf(services): - if config.nosetup: - return - have_mds = 0 - for s in services: - if s[1].get_class() == 'mdsdev': - n = newService(s[1]) - n.write_conf() - have_mds = 1 - if have_mds == 0: - panic("Cannot find mds device, please run --write_conf on the mds node.") - - -def doSetup(services): - if config.nosetup: - return - for s in services: - n = newService(s[1]) - n.prepare() - -def doModules(services): - if config.nomod: - return - for s in services: - n = newService(s[1]) - n.load_module() - -def doCleanup(services): - if config.nosetup: - return - services.reverse() - for s in services: - n = newService(s[1]) - if n.safe_to_clean(): - n.cleanup() - -def doUnloadModules(services): - if config.nomod: - return - services.reverse() - for s in services: - n = newService(s[1]) - if n.safe_to_clean_modules(): - n.cleanup_module() - -def doMakeServiceScript(services): - if config.nosetup: - return - try: - os.makedirs(config.service_scripts) - except OSError, e: - if e[0] != errno.EEXIST: - panic("Couldn't create scripts dir " + config.service_scripts + ": " + e[1]) - - for s in services: - if s[1].get_class() != 'osd' and s[1].get_class() != 'mdsdev': - continue - - target_uuid = s[1].get_first_ref('target') - target = toplustreDB.lookup(target_uuid) - target_symlink = config.service_scripts + "/" + target.getName() - if config.force: - try: - try: - os.unlink(target_symlink) - if config.verbose: - print "Removed " + target_symlink - except OSError, e: - if e[0] != errno.EISDIR: - raise e - os.rmdir(target_symlink) - if config.verbose: - print "Removed " + target_symlink - except OSError, e: - if e[0] != errno.ENOENT: - panic("Error removing " + target_symlink + ": " + e[1]) - - try: - os.symlink("/etc/init.d/lustre", target_symlink) - if config.verbose: - print "Created service link " + target_symlink + " to /etc/init.d/lustre" - - except OSError, e: - if e[0] == errno.EEXIST: - extra_error = " (use --force option to remove existing files)" - else: - extra_error = "" - panic("Error creating " + target_symlink + ": " + e[1] + extra_error) - -# Check mtime of config logs -def doCheckMtime(lustreDB, hosts): - for h in hosts: - node_db = lustreDB.lookup_name(h, 'node') - if node_db: - break - if not node_db: - return - - mdsdb = 0 - prof_list = node_db.get_refs('profile') - for prof_uuid in prof_list: - prof_db = node_db.lookup(prof_uuid) - if prof_db: - services = getServices(prof_db) - for s in services: - if s[1].get_class() == 'mdsdev': - mdsdb = s[1] - break - - if mdsdb and lustreDB.get_mtime(): - debug("Checking XML modification time") - devpath = mdsdb.get_val('devpath','') - xmtime = string.atol(lustreDB.get_mtime()) - cmd = "debugfs -c -R 'stat /LOGS' %s 2>&1 | grep mtime" %devpath - ret, kmtimes = runcmd(cmd) - if ret: - log("Can not get mtime info of MDS LOGS directory") - else: - kmtime = string.atoi(string.split(kmtimes[0])[1], 0) - if xmtime > kmtime: - debug('xmtime ', xmtime, '> kmtime', kmtime) - if config.old_conf: - log("Warning: MDS startup logs are older than config %s." - " Please run --write_conf on stopped MDS to update." - %CONFIG_FILE) - else: - panic("Error: MDS startup logs are older than config %s." - " Please run --write_conf on stopped MDS to update." - " Use '--old_conf' to start anyways." %CONFIG_FILE) - return - -# -# Load profile for -def doHost(lustreDB, hosts): - global local_node_name, tgt_select - node_db = None - for h in hosts: - node_db = lustreDB.lookup_name(h, 'node') - if node_db: - if config.service: - tgt_select[config.service] = h - config.group = config.service - break - if not node_db: - panic('No host entry found.') - - local_node_name = node_db.get_val('name', 0) - lustre_upcall = node_db.get_val('lustreUpcall', '') - portals_upcall = node_db.get_val('portalsUpcall', '') - timeout = node_db.get_val_int('timeout', 0) - ptldebug = node_db.get_val('ptldebug', '') - subsystem = node_db.get_val('subsystem', '') - - # Two step process: (1) load modules, (2) setup lustre - # if not cleaning, load modules first. - prof_list = node_db.get_refs('profile') - - if config.make_service_scripts: - for_each_profile(node_db, prof_list, doMakeServiceScript) - return - - elif config.write_conf: - for_each_profile(node_db, prof_list, doModules) - for_each_profile(node_db, prof_list, doWriteconf) - for_each_profile(node_db, prof_list, doUnloadModules) - lustreDB.close() - - elif config.recover: - if not (config.tgt_uuid and config.client_uuid and config.conn_uuid): - raise Lustre.LconfError( "--recovery requires --tgt_uuid " + - "--client_uuid --conn_uuid ") - doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid, - config.conn_uuid) - elif config.cleanup: - if not mod_loaded('lnet'): - return - - # ugly hack, only need to run lctl commands for --dump - if config.lctl_dump or config.record: - for_each_profile(node_db, prof_list, doCleanup) - return - - sys_set_ptldebug(ptldebug) - sys_set_subsystem(subsystem) - sys_set_lustre_upcall(lustre_upcall) - sys_set_portals_upcall(portals_upcall) - - for_each_profile(node_db, prof_list, doCleanup) - for_each_profile(node_db, prof_list, doUnloadModules) - lustreDB.close() - - else: - # ugly hack, only need to run lctl commands for --dump - if config.lctl_dump or config.record: - sys_set_timeout(timeout) - sys_set_lustre_upcall(lustre_upcall) - for_each_profile(node_db, prof_list, doSetup) - return - - if PLATFORM == 'LINUX': - sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF) - sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF) - - for_each_profile(node_db, prof_list, doModules) - - if PLATFORM == 'LINUX': - # XXX need to be fixed for Darwin - sys_set_debug_path() - sys_set_ptldebug(ptldebug) - sys_set_subsystem(subsystem) - script = config.gdb_script - run(lctl.lctl, ' modules >', script) - if config.gdb: - log ("The GDB module script is in", script) - # pause, so user has time to break and - # load the script - time.sleep(5) - sys_set_timeout(timeout) - sys_set_lustre_upcall(lustre_upcall) - sys_set_portals_upcall(portals_upcall) - - for_each_profile(node_db, prof_list, doSetup) - lustreDB.close() - -def add_clumanager_node(node_db, nodes, services): - new_services = [] - node_name = node_db.getUUID() - nodes[node_name] = [] - - for prof_uuid in node_db.get_refs('profile'): - prof_db = toplustreDB.lookup(prof_uuid) - for ref_class, ref_uuid in prof_db.get_all_refs(): - if ref_class not in ('osd', 'mdsdev'): - continue - devdb = toplustreDB.lookup(ref_uuid) - tgt_uuid = devdb.get_first_ref('target') - - nodes[node_name].append(ref_uuid) - - if not services.has_key(tgt_uuid): - if config.verbose: - print "New service: " + tgt_uuid + " (originally found on " + node_name + ")" - new_services.append(tgt_uuid) - services[tgt_uuid] = [] - services[tgt_uuid].append(ref_uuid) - - return new_services - -def add_clumanager_services(new_services, nodes, dev_list): - new_nodes = [] - for devdb in dev_list: - tgt_uuid = devdb.get_first_ref('target') - if tgt_uuid in new_services: - node_uuid = devdb.get_first_ref('node') - - if not (nodes.has_key(node_uuid) or node_uuid in new_nodes): - if config.verbose: - print "New node: " + node_uuid + " for service " + tgt_uuid - new_nodes.append(node_uuid) - - return new_nodes - -def doClumanager(lustreDB, hosts): - nodes = {} - services = {} - - dev_list = [] - - for dev_uuid in toplustreDB.get_refs('osd') + toplustreDB.get_refs('mdsdev'): - dev_list.append(lustreDB.lookup(dev_uuid)) - - node_db = None - for h in hosts: - node_db = lustreDB.lookup_name(h, 'node') - if node_db: - our_host = h - new_services = add_clumanager_node(node_db, nodes, services) - break - - if not node_db: - panic('No host entry found.') - - while 1: - if len(new_services) == 0: - break - - new_nodes = add_clumanager_services(new_services, nodes, dev_list) - if len(new_nodes) == 0: - break - - if len(new_nodes) + len(nodes.keys()) > 8: - panic("CluManager only supports 8 nodes per failover \"cluster.\"") - - new_services = [] - for node_uuid in new_nodes: - node_db = lustreDB.lookup(node_uuid) - if not node_db: - panic("No node entry for " + node_uuid + " was found.") - - new_services.append(add_clumanager_node(node_db, nodes, services)) - - nodenames = [] - for node in nodes.keys(): - nodedb = lustreDB.lookup(node) - nodenames.append(nodedb.getName()) - nodenames.sort() - - print """ - - - - - - - - - """ % (string.join(nodenames), config.rawprimary, config.rawsecondary) - - - i = 0 - for node in nodenames: - print " " % (i, node) - i = i + 1 - - print " \n " - - servicekeys = services.keys() - servicekeys.sort() - - i = 0 - for service in servicekeys: - svcdb = lustreDB.lookup(service) - print " " % (i, svcdb.getName()) - i = i + 1 - - j = 0 - active_uuid = get_active_target(svcdb) - for svc_uuid in [active_uuid] + services[service]: - if svc_uuid == active_uuid and j > 0: - continue - svcdb = lustreDB.lookup(svc_uuid) - - svc_node_uuid = svcdb.get_first_ref('node') - svc_nodedb = lustreDB.lookup(svc_node_uuid) - - print " " % (j, svc_nodedb.getName()) - j = j + 1 - - print " " - - print " \n " - - i = 0 - for service in servicekeys: - svcdb = lustreDB.lookup(service) - active_uuid = get_active_target(svcdb) - activedb = lustreDB.lookup(active_uuid) - - svc_node_uuid = activedb.get_first_ref('node') - svc_nodedb = lustreDB.lookup(svc_node_uuid) - - print " " \ - % ( svcdb.getName(), i, svcdb.getName(), config.service_scripts, svcdb.getName()) - print " \n " - i = i + 1 - - print " \n" - -def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid): - tgt = lustreDB.lookup(tgt_uuid) - if not tgt: - raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.") - new_uuid = get_active_target(tgt) - if not new_uuid: - raise Lustre.LconfError("doRecovery: no active target found for: " + - tgt_uuid) - srv_list = find_local_servers(get_ost_net(lustreDB, new_uuid)) - if not srv_list[0]: - raise Lustre.LconfError("Unable to find a connection to:" + new_uuid) - - oldsrv = get_server_by_nid_uuid(lustreDB, nid_uuid) - lustreDB.close() - - for srv in srv_list: - if oldsrv.net_type != srv.net_type: - continue - - log("Reconnecting", tgt_uuid, "to", srv.nid_uuid) - - lctl.recover(client_uuid, srv.nid_uuid) - - -def setupModulePath(cmd, portals_dir = PORTALS_DIR): - base = os.path.dirname(cmd) - if development_mode(): - if not config.lustre: - debug('using objdir module paths') - config.lustre = (os.path.join(base, "..")) - # normalize the portals dir, using command line arg if set - if config.portals: - portals_dir = config.portals - dir = os.path.join(config.lustre, portals_dir) - config.portals = dir - debug('config.portals', config.portals) - elif config.lustre and config.portals: - # production mode - # if --lustre and --portals, normalize portals - # can ignore POTRALS_DIR here, since it is probly useless here - config.portals = os.path.join(config.lustre, config.portals) - debug('config.portals B', config.portals) - -def sysctl(path, val): - debug("+ sysctl", path, val) - if config.noexec: - return - try: - fp = open(os.path.join('/proc/sys', path), 'w') - fp.write(str(val)) - fp.close() - except IOError, e: - panic(str(e)) - - -def sys_set_debug_path(): - sysctl('lnet/debug_path', config.debug_path) - -def validate_upcall(upcall): - import os - if upcall in ('DEFAULT','NONE'): - pass - elif os.path.exists(upcall): - if not os.access(upcall, os.X_OK): - print "WARNING upcall script not executable: %s" % upcall - else: - print "WARNING invalid upcall script specified: %s" % upcall - -def sys_set_lustre_upcall(upcall): - # the command line overrides the value in the node config - if config.lustre_upcall: - upcall = config.lustre_upcall - elif config.upcall: - upcall = config.upcall - if upcall: - validate_upcall(upcall) - lctl.set_lustre_upcall(upcall) - -def sys_set_portals_upcall(upcall): - # the command line overrides the value in the node config - if config.portals_upcall: - upcall = config.portals_upcall - elif config.upcall: - upcall = config.upcall - if upcall: - validate_upcall(upcall) - sysctl('lnet/upcall', upcall) - -def sys_set_group_upcall(mds, upcall): - if config.noexec: - return - # the command line overrides the value in the MDS config - if config.group_upcall: - upcall = config.group_upcall - if upcall: - validate_upcall(upcall) - debug("setting MDS", mds, "upcall to:", upcall) - path = "/proc/fs/lustre/mds/" + mds + "/group_upcall" - fp = open(path, 'w') - fp.write(upcall) - fp.close() - -def sys_set_timeout(timeout): - # the command overrides the value in the node config - if config.timeout and config.timeout > 0: - timeout = config.timeout - if timeout != None and timeout > 0: - lctl.set_timeout(timeout) - -def sys_tweak_socknal (): - if config.single_socket: - sysctl("socknal/typed", 0) - -def sys_optimize_elan (): - procfiles = ["/proc/elan/config/eventint_punt_loops", - "/proc/qsnet/elan3/config/eventint_punt_loops", - "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"] - for p in procfiles: - if os.access(p, os.W_OK): - run ("echo 1 > " + p) - -def sys_set_ptldebug(ptldebug): - if config.ptldebug: - ptldebug = config.ptldebug - if ptldebug: - try: - val = eval(ptldebug, ptldebug_names) - val = "0x%x" % (val) - sysctl('lnet/debug', val) - except NameError, e: - panic(str(e)) - -def sys_set_subsystem(subsystem): - if config.subsystem: - subsystem = config.subsystem - if subsystem: - try: - val = eval(subsystem, subsystem_names) - val = "0x%x" % (val) - sysctl('lnet/subsystem_debug', val) - except NameError, e: - panic(str(e)) - -def sys_set_netmem_max(path, max): - debug("setting", path, "to at least", max) - if config.noexec: - return - fp = open(path) - str = fp.readline() - fp.close() - cur = int(str) - if max > cur: - fp = open(path, 'w') - fp.write('%d\n' %(max)) - fp.close() - - -# Add dir to the global PATH, if not already there. -def add_to_path(new_dir): - syspath = string.split(os.environ['PATH'], ':') - if new_dir in syspath: - return - os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir - -def default_debug_path(): - path = '/tmp/lustre-log' - if os.path.isdir('/r'): - return '/r' + path - else: - return path - -def default_gdb_script(): - script = '/tmp/ogdb' - if os.path.isdir('/r'): - return '/r' + script - else: - return script - -DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin') -# ensure basic elements are in the system path -def sanitise_path(): - for dir in DEFAULT_PATH: - add_to_path(dir) - -# global hack for the --select handling -tgt_select = {} -def init_select(args): - # args = [service=nodeA,service2=nodeB service3=nodeC] - # --service is analagous to: - # --group --select = - # this is handled in doHost() - global tgt_select - for arg in args: - list = string.split(arg, ',') - for entry in list: - srv, node = string.split(entry, '=') - tgt_select[srv] = node - -def get_select(srv): - if tgt_select.has_key(srv): - return tgt_select[srv] - return None - - -FLAG = Lustre.Options.FLAG -PARAM = Lustre.Options.PARAM -INTPARAM = Lustre.Options.INTPARAM -PARAMLIST = Lustre.Options.PARAMLIST -lconf_options = [ - ('verbose,v', "Print system commands as they are run"), - ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM), - ('config', "Cluster config name used for LDAP query", PARAM), - ('select', "service=nodeA,service2=nodeB ", PARAMLIST), - ('service', "shorthand for --group --select =", PARAM), - ('node', "Load config for ", PARAM), - ('cleanup,d', "Cleans up config. (Shutdown)"), - ('force,f', "Forced unmounting and/or obd detach during cleanup", - FLAG, 0), - ('single_socket', "socknal option: only use one socket instead of bundle", - FLAG, 0), - ('failover',"""Used to shut down without saving state. - This will allow this node to "give up" a service to a - another node for failover purposes. This will not - be a clean shutdown.""", - FLAG, 0), - ('abort_recovery',"""Used to start a service when you know recovery - will not succeed. This will skip the recovery - timeout period."""), - ('gdb', """Prints message after creating gdb module script - and sleeps for 5 seconds."""), - ('noexec,n', """Prints the commands and steps that will be run for a - config without executing them. This can used to check if a - config file is doing what it should be doing"""), - ('nomod', "Skip load/unload module step."), - ('nosetup', "Skip device setup/cleanup step."), - ('reformat', "Reformat all devices (without question)"), - ('mkfsoptions', "Additional options for the mk*fs command line", PARAM), - ('mountfsoptions', "Additional options for mount fs command line", PARAM), - ('clientoptions', "Additional options for Lustre", PARAM), - ('dump', "Dump the kernel debug log to file before portals is unloaded", - PARAM), - ('write_conf', "Save all the client config information on mds."), - ('old_conf', "Start up service even though config logs appear outdated."), - ('record', "Write config information on mds."), - ('record_log', "Name of config record log.", PARAM), - ('record_device', "MDS device name that will record the config commands", - PARAM), - ('minlevel', "Minimum level of services to configure/cleanup", - INTPARAM, 0), - ('maxlevel', """Maximum level of services to configure/cleanup - Levels are aproximatly like: - 10 - network - 20 - device, ldlm - 30 - osd, mdd - 40 - mds, ost - 70 - mountpoint, echo_client, osc, mdc, lov""", - INTPARAM, 100), - ('lustre', """Base directory of lustre sources. This parameter will - cause lconf to load modules from a source tree.""", PARAM), - ('portals', """Portals source directory. If this is a relative path, - then it is assumed to be relative to lustre. """, PARAM), - ('timeout', "Set recovery timeout", INTPARAM), - ('upcall', "Set both portals and lustre upcall script", PARAM), - ('lustre_upcall', "Set lustre upcall script", PARAM), - ('portals_upcall', "Set portals upcall script", PARAM), - ('group_upcall', "Set supplementary group upcall program", PARAM), - ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM), - ('ptldebug', "Set the portals debug level", PARAM), - ('subsystem', "Set the portals debug subsystem", PARAM), - ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()), - ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()), - ('allow_unprivileged_port', "Allow connections from unprivileged ports"), - ('clumanager', "Generate CluManager config file for this node's cluster"), - ('rawprimary', "For clumanager, device of the primary quorum", PARAM, "/dev/raw/raw1"), - ('rawsecondary', "For clumanager, device of the secondary quorum", PARAM, "/dev/raw/raw2"), - ('service_scripts', "For clumanager, directory containing per-service scripts", PARAM, "/etc/lustre/services"), - ('make_service_scripts', "Create per-service symlinks for use with clumanager"), -# Client recovery options - ('recover', "Recover a device"), - ('group,g', "The group of devices to configure or cleanup", PARAM), - ('tgt_uuid', "The failed target (required for recovery)", PARAM), - ('client_uuid', "The failed client (required for recovery)", PARAM), - ('conn_uuid', "The failed connection (required for recovery)", PARAM), - - ('inactive', """The name of an inactive service, to be ignored during - mounting (currently OST-only). Can be repeated.""", - PARAMLIST), - ('user_xattr', """Enable user_xattr support on MDS""", FLAG, 0), - ('acl', """Enable ACL support on MDS""", FLAG, 0), - ('quota', "Enable quota support for client file system", PARAM), - ] - -def main(): - global lctl, config, toplustreDB, CONFIG_FILE - - # in the upcall this is set to SIG_IGN - signal.signal(signal.SIGCHLD, signal.SIG_DFL) - - cl = Lustre.Options("lconf", "config.xml", lconf_options) - try: - config, args = cl.parse(sys.argv[1:]) - except Lustre.OptionError, e: - print e - sys.exit(1) - - setupModulePath(sys.argv[0]) - - host = socket.gethostname() - - # the PRNG is normally seeded with time(), which is not so good for starting - # time-synchronized clusters - input = open('/dev/urandom', 'r') - if not input: - print 'Unable to open /dev/urandom!' - sys.exit(1) - seed = input.read(32) - input.close() - random.seed(seed) - - sanitise_path() - - init_select(config.select) - - if len(args) > 0: - # allow config to be fetched via HTTP, but only with python2 - if sys.version[0] != '1' and args[0].startswith('http://'): - import urllib2 - try: - config_file = urllib2.urlopen(args[0]) - except (urllib2.URLError, socket.error), err: - if hasattr(err, 'args'): - err = err.args[1] - print "Could not access '%s': %s" %(args[0], err) - sys.exit(1) - elif not os.access(args[0], os.R_OK): - print 'File not found or readable:', args[0] - sys.exit(1) - else: - # regular file - config_file = open(args[0], 'r') - try: - dom = xml.dom.minidom.parse(config_file) - except Exception: - panic("%s does not appear to be a config file." % (args[0])) - sys.exit(1) # make sure to die here, even in debug mode. - config_file.close() - CONFIG_FILE = args[0] - lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement) - if not config.config: - config.config = os.path.basename(args[0])# use full path? - if config.config[-4:] == '.xml': - config.config = config.config[:-4] - elif config.ldapurl: - if not config.config: - panic("--ldapurl requires --config name") - dn = "config=%s,fs=lustre" % (config.config) - lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl) - elif config.ptldebug or config.subsystem: - sys_set_ptldebug(None) - sys_set_subsystem(None) - sys.exit(0) - else: - print 'Missing config file or ldap URL.' - print 'see lconf --help for command summary' - sys.exit(1) - - if config.reformat and config.cleanup: - panic("Options \"reformat\" and \"cleanup\" are incompatible. "+ - "Please specify only one.") - - toplustreDB = lustreDB - - ver = lustreDB.get_version() - if not ver: - panic("No version found in config data, please recreate.") - if ver != Lustre.CONFIG_VERSION: - panic("Config version", ver, "does not match lconf version", - Lustre.CONFIG_VERSION) - - node_list = [] - if config.node: - node_list.append(config.node) - else: - if len(host) > 0: - node_list.append(host) -# node_list.append('localhost') - - debug("configuring for host: ", node_list) - - if len(host) > 0: - config.debug_path = config.debug_path + '-' + host - config.gdb_script = config.gdb_script + '-' + host - - lctl = LCTLInterface('lctl') - - if config.lctl_dump: - lctl.use_save_file(config.lctl_dump) - - if not (config.reformat or config.write_conf or config.cleanup): - doCheckMtime(lustreDB, node_list) - - if config.record: - if not (config.record_device and config.record_log): - panic("When recording, both --record_log and --record_device must be specified.") - lctl.clear_log(config.record_device, config.record_log) - lctl.record(config.record_device, config.record_log) - - if config.clumanager: - doClumanager(lustreDB, node_list) - else: - doHost(lustreDB, node_list) - - if config.record: - lctl.end_record() - -if __name__ == "__main__": - try: - main() - except Lustre.LconfError, e: - print e -# traceback.print_exc(file=sys.stdout) - sys.exit(1) - except CommandError, e: - e.dump() - rc = e.rc - if rc == 0: - rc = 1 - sys.exit(rc) - - if first_cleanup_error: - sys.exit(first_cleanup_error) diff --git a/lustre/utils/lmc b/lustre/utils/lmc deleted file mode 100755 index 6c1b781..0000000 --- a/lustre/utils/lmc +++ /dev/null @@ -1,1256 +0,0 @@ -#!/usr/bin/env python -# GPL HEADER START -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 only, -# as published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License version 2 for more details (a copy is included -# in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU General Public License -# version 2 along with this program; If not, see -# http://www.sun.com/software/products/lustre/docs/GPLv2.pdf -# copy of GPLv2]. -# -# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, -# CA 95054 USA or visit www.sun.com if you need additional information or -# have any questions. -# -# GPL HEADER END -# - -# -# Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. -# Use is subject to license terms. -# - -# -# This file is part of Lustre, http://www.lustre.org/ -# Lustre is a trademark of Sun Microsystems, Inc. -# -# Author: Robert Read - -""" -lmc - lustre configuration data manager - - See the man page, or the Lustre Operations Manual, for documentation on lmc. - -""" - -import sys, os, getopt, string, exceptions, re -import xml.dom.minidom - -def printDoc(doc, stream=sys.stdout): - try: - from xml.dom.ext import PrettyPrint - PrettyPrint(doc, stream) - except ImportError: - stream.write(doc.toxml()) - stream.write("\n") - - -PYMOD_DIR = ["/usr/lib/lustre/python", "/usr/lib64/lustre/python"] - -def development_mode(): - base = os.path.dirname(sys.argv[0]) - if os.access(base+"/Makefile.am", os.R_OK): - return 1 - return 0 - -if not development_mode(): - sys.path.extend(PYMOD_DIR) - -import Lustre - -DEFAULT_PORT = 988 -DEFAULT_STRIPE_SZ = 1048576 -DEFAULT_STRIPE_CNT = 1 -DEFAULT_STRIPE_PATTERN = 0 -UUID_MAX_LENGTH = 31 - -def reference(): - print """usage: lmc --add object [object parameters] - -Object creation command summary: - ---add node - --node node_name - --timeout num - --upcall path - --lustre_upcall path - --group_upcall path - --portals_upcall path - --ptldebug debug_level - --subsystem subsystem_name - ---add net - --node node_name - --nid nid - --cluster_id - --nettype tcp|elan|gm|openib|iib|vib|ra|ptl|lnet - --hostaddr ip[/netmask] - --port port - --tcpbuf size - --irq_affinity 0|1 - --router - ---add mds - --node node_name - --mds mds_name - --dev path - --fstype ldiskfs|ext3 - --size size - --nspath - --group_upcall upcall - --journal_size size - --inode_size size - --mdsuuid uuid - --mkfsoptions options - --mountfsoptions options - --quota quotaon=u|g|ug,iunit=,bunit=,itune=,btune= - ---add lov - --lov lov_name - --mds mds_name - --stripe_sz num - --stripe_cnt num - --stripe_pattern num - ---add ost - --node node_name - --ost ost_name - --failout - --failover - --lov lov_name - --dev path - --size size - --fstype ldiskfs|ext3 - --journal_size size - --inode_size size - --osdtype obdecho|obdfilter - --ostuuid uuid - --mkfsoptions options - --mountfsoptions options - --quota quotaon=u|g|ug,iunit=,bunit=,itune=,btune= - ---add mtpt - Mountpoint - --node node_name - --path /mnt/point - --mds mds_name - --ost ost_name OR --lov lov_name - --clientoptions options - ---add route - --node nodename - --router - --gw nid - --gateway_cluster_id nid - --target_cluster_id nid - --lo nid - --hi nid - ---add echo_client - --node nodename -""" - -PARAM = Lustre.Options.PARAM -PARAMLIST = Lustre.Options.PARAMLIST -lmc_options = [ - # lmc input/output options - ('reference', "Print short reference for commands."), - ('verbose,v', "Print system commands as they are run."), - ('merge,m', "Append to the specified config file.", PARAM), - ('output,o', "Write XML configuration into given output file. Overwrite existing content.", PARAM), - ('input,i', "", PARAM), - ('batch', "Used to execute lmc commands in batch mode.", PARAM), - - # commands - ('add', "", PARAM), - - # node options - ('node', "Add a new node in the cluster configuration.", PARAM), - ('timeout', "Set timeout to initiate recovery.", PARAM), - ('upcall', "Set both lustre and portals upcall scripts.", PARAM), - ('lustre_upcall', "Set location of lustre upcall script.", PARAM), - ('group_upcall', "Set location of extended group upcall script.", PARAM), - ('portals_upcall', "Set location of portals upcall script.", PARAM), - ('ptldebug', "Set the portals debug level", PARAM), - ('subsystem', "Specify which Lustre subsystems have debug output recorded in the log", PARAM), - - # network - ('nettype', "Specify the network type. This can be tcp/elan/gm/openib/iib/vib/ra/ptl/lnet.", PARAM), - ('nid', "Give the network ID, e.g ElanID/IP Address as used by portals.", PARAM), - ('port', "Optional argument to specify the TCP port number.", PARAM, DEFAULT_PORT), - ('hostaddr', "Optional argument to specify the host address.", PARAMLIST), - ('cluster_id', "Specify the cluster ID", PARAM, "0"), - ('nonet', "Skip the remote host networking check"), - - # routes - ('route', "Add a new route for the cluster.", PARAM), - ('router', "Optional flag to mark a node as router."), - ('gw', "Specify the nid of the gateway for a route.", PARAM), - ('gateway_cluster_id', "", PARAM, "0"), - ('target_cluster_id', "", PARAM, "0"), - ('lo', "For a range route, this is the low value nid.", PARAM), - ('hi', "For a range route, this is a hi value nid.", PARAM,""), - - # servers: mds and ost - ('mds', "Specify MDS name.", PARAM), - ('ost', "Specify the OST name.", PARAM,""), - ('osdtype', "This could obdfilter or obdecho.", PARAM, "obdfilter"), - ('failout', "Disable failover support on OST"), - ('failover', "Enable failover support on OST"), - ('group', "", PARAM), - ('dev', "Path of the device on local system.", PARAM,""), - ('size', "Specify the size of the device if needed.", PARAM,"0"), - ('group_upcall', "Set location of supplementary group upcall.", PARAM,""), - ('journal_size', "Specify new journal size for underlying ext3 file system.", PARAM,"0"), - ('inode_size', "Specify new inode size for underlying ext3 file system.", PARAM,"0"), - ('fstype', "Optional argument to specify the filesystem type.", PARAM, "ext3"), - ('mkfsoptions', "Optional argument to mkfs.", PARAM, ""), - ('mountfsoptions', "Optional argument to mount fs.", PARAM, ""), - ('ostuuid', "Optional argument to specify OST UUID", PARAM,""), - ('mdsuuid', "Optional argument to specify MDS UUID", PARAM,""), - ('nspath', "Local mount point of server namespace.", PARAM,""), - ('format', ""), - ('quota', """ - quotaon: enable quota, only u|g|ug is supported now. - iunit: the unit for slave to acquire/release inode quota from/to master. - Int type (>0), default value in Lustre is 5120 inodes. - bunit: the unit for slave to acquire/release block quota from/to master. - Mbytes (>0), default value in Lustre is 128(Mbytes). - itune: used to tune the threthold. When inode quota usage reach the threthold, - slave should acquire/release inode quota from/to master. - Int type (100 > btune > 0), default value in Lustre is 50 (percentge). - inode threthold = iunit * itune / 100. - btune: used to tune the threthold. When block quota usage reach the threthold, - slave should acquire/release block quota from/to master. - Int type (100 > btune > 0), default value in Lustre is 50 (percentage). - block threthold = bunit * btune / 100.""", PARAM,""), - # clients: mountpoint and echo - ('echo_client', "", PARAM), - ('path', "Specify the mountpoint for Lustre.", PARAM), - ('filesystem', "Lustre filesystem name", PARAM,""), - ('clientoptions', "Specify the options for Lustre, such as async.", PARAM, ""), - - # lov - ('lov', "Specify LOV name.", PARAM,""), - ('stripe_sz', "Specify the stripe size in bytes.", PARAM, DEFAULT_STRIPE_SZ), - ('stripe_cnt', "Specify the number of OSTs each file should be striped on.", PARAM, DEFAULT_STRIPE_CNT), - ('stripe_pattern', "Specify the stripe pattern. RAID 0 is the only one currently supported.", PARAM, 0), - - # cobd - ('real_obd', "Specify the real device for the cache obd system.", PARAM), - ('cache_obd', "Specify the cache device for the cache obd system.", PARAM), - ] - -def error(*args): - msg = string.join(map(str,args)) - raise OptionError("Error: " + msg) - -def panic(cmd, msg): - print "! " + cmd - print msg - sys.exit(1) - -def warning(*args): - msg = string.join(map(str,args)) - sys.stderr.write("WARNING: %s\n" % (msg)) - -def info(*args): - msg = string.join(map(str,args)) - sys.stderr.write("INFO: %s\n" % (msg)) - -# -# manage names and uuids -# need to initialize this by walking tree to ensure -# no duplicate names or uuids are created. -# this are just place holders for now. -# consider changing this to be like OBD-dev-host -def new_name(base): - ctr = 2 - ret = base - while names.has_key(ret): - ret = "%s_%d" % (base, ctr) - ctr = 1 + ctr - names[ret] = 1 - return ret - -def new_uuid(name): - ctr = 2 - ret = "%s_UUID" % (name) - if len(ret) > UUID_MAX_LENGTH: - ret = ret[-UUID_MAX_LENGTH:] - while uuids.has_key(ret): - ret = "%s_UUID_%d" % (name, ctr) - ctr = 1 + ctr - if len(ret) > UUID_MAX_LENGTH: - ret = ret[-UUID_MAX_LENGTH:] - uuids[ret] = 1 - return ret - - -ldlm_name = 'ldlm' -ldlm_uuid = 'ldlm_UUID' - -def new_lustre(dom): - """Create a new empty lustre document""" - # adding ldlm here is a bit of a hack, but one is enough. - str = """ - - """ % (Lustre.CONFIG_VERSION, ldlm_name, ldlm_uuid) - return dom.parseString(str) - - -names = {} -uuids = {} - -def init_names(doc): - """initialize auto-name generation tables""" - global names, uuids - # get all elements that contain a name attribute - for n in doc.childNodes: - if n.nodeType == n.ELEMENT_NODE: - if getName(n): - names[getName(n)] = 1 - uuids[getUUID(n)] = 1 - init_names(n) - -def get_format_flag(options): - if options.format: - return 'yes' - return 'no' - -############################################################ -# Build config objects using DOM -# -class GenConfig: - doc = None - dom = None - def __init__(self, doc): - self.doc = doc - - def ref(self, type, uuid): - """ generate <[type]_ref uuidref="[uuid]"/> """ - tag = "%s_ref" % (type) - ref = self.doc.createElement(tag) - ref.setAttribute("uuidref", uuid) - return ref - - def newService(self, tag, name, uuid): - """ create a new service elmement, which requires name and uuid attributes """ - new = self.doc.createElement(tag) - new.setAttribute("uuid", uuid); - new.setAttribute("name", name); - return new - - def addText(self, node, str): - txt = self.doc.createTextNode(str) - node.appendChild(txt) - - def addElement(self, node, tag, str=None): - """ create a new element and add it as a child to node. If str is passed, - a text node is created for the new element""" - new = self.doc.createElement(tag) - if str: - self.addText(new, str) - node.appendChild(new) - return new - - def recordtime(self, timestr): - lustre = self.doc.getElementsByTagName("lustre") - lustre[0].setAttribute("mtime", timestr) - - def network(self, name, uuid, nid, cluster_id, net, hostaddr="", - port=0): - """create node""" - network = self.newService("network", name, uuid) - network.setAttribute("nettype", net); - self.addElement(network, "nid", nid) - self.addElement(network, "clusterid", cluster_id) - for host in hostaddr: - self.addElement(network, "hostaddr", host) - if port: - self.addElement(network, "port", "%d" %(port)) - - return network - - def routetbl(self, name, uuid): - """create node""" - rtbl = self.newService("routetbl", name, uuid) - return rtbl - - def route(self, gw_net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi): - """ create one entry for the route table """ - ref = self.doc.createElement('route') - ref.setAttribute("type", gw_net_type) - ref.setAttribute("gw", gw) - ref.setAttribute("gwclusterid", gw_cluster_id) - ref.setAttribute("tgtclusterid", tgt_cluster_id) - ref.setAttribute("lo", lo) - if hi: - ref.setAttribute("hi", hi) - return ref - - def profile(self, name, uuid): - """ create a host """ - profile = self.newService("profile", name, uuid) - return profile - - def node(self, name, uuid, prof_uuid): - """ create a host """ - node = self.newService("node", name, uuid) - node.appendChild(self.ref("profile", prof_uuid)) - return node - - def ldlm(self, name, uuid): - """ create a ldlm """ - ldlm = self.newService("ldlm", name, uuid) - return ldlm - - def osd(self, name, uuid, fstype, osdtype, devname, format, ost_uuid, - node_uuid, dev_size=0, journal_size=0, inode_size=0, nspath="", - mkfsoptions="", mountfsoptions="", quota=""): - osd = self.newService("osd", name, uuid) - osd.setAttribute('osdtype', osdtype) - osd.appendChild(self.ref("target", ost_uuid)) - osd.appendChild(self.ref("node", node_uuid)) - if fstype: - self.addElement(osd, "fstype", fstype) - if devname: - dev = self.addElement(osd, "devpath", devname) - self.addElement(osd, "autoformat", format) - if dev_size: - self.addElement(osd, "devsize", "%s" % (dev_size)) - if journal_size: - self.addElement(osd, "journalsize", "%s" % (journal_size)) - if inode_size: - self.addElement(osd, "inodesize", "%s" % (inode_size)) - if mkfsoptions: - self.addElement(osd, "mkfsoptions", mkfsoptions) - if mountfsoptions: - self.addElement(osd, "mountfsoptions", mountfsoptions) - if quota: - self.addElement(osd, "quota", quota) - if nspath: - self.addElement(osd, "nspath", nspath) - return osd - - def cobd(self, name, uuid, real_uuid, cache_uuid): - cobd = self.newService("cobd", name, uuid) - cobd.appendChild(self.ref("realobd",real_uuid)) - cobd.appendChild(self.ref("cacheobd",cache_uuid)) - return cobd - - def ost(self, name, uuid, osd_uuid, group=""): - ost = self.newService("ost", name, uuid) - ost.appendChild(self.ref("active", osd_uuid)) - if group: - self.addElement(ost, "group", group) - return ost - - def oss(self, name, uuid): - oss = self.newService("oss", name, uuid) - return oss - - def lov(self, name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern): - lov = self.newService("lov", name, uuid) - lov.appendChild(self.ref("mds", mds_uuid)) - lov.setAttribute("stripesize", str(stripe_sz)) - lov.setAttribute("stripecount", str(stripe_cnt)) - lov.setAttribute("stripepattern", str(pattern)) - return lov - - def lovconfig(self, name, uuid, lov_uuid): - lovconfig = self.newService("lovconfig", name, uuid) - lovconfig.appendChild(self.ref("lov", lov_uuid)) - return lovconfig - - def mds(self, name, uuid, mdd_uuid, group=""): - mds = self.newService("mds", name, uuid) - mds.appendChild(self.ref("active",mdd_uuid)) - if group: - self.addElement(mds, "group", group) - return mds - - def mdsdev(self, name, uuid, fstype, devname, format, node_uuid, - mds_uuid, dev_size=0, journal_size=0, inode_size=256, - nspath="", mkfsoptions="", mountfsoptions="", quota="", group_upcall=""): - mdd = self.newService("mdsdev", name, uuid) - self.addElement(mdd, "fstype", fstype) - dev = self.addElement(mdd, "devpath", devname) - self.addElement(mdd, "autoformat", format) - if dev_size: - self.addElement(mdd, "devsize", "%s" % (dev_size)) - if journal_size: - self.addElement(mdd, "journalsize", "%s" % (journal_size)) - if inode_size: - self.addElement(mdd, "inodesize", "%s" % (inode_size)) - if nspath: - self.addElement(mdd, "nspath", nspath) - if mkfsoptions: - self.addElement(mdd, "mkfsoptions", mkfsoptions) - if mountfsoptions: - self.addElement(mdd, "mountfsoptions", mountfsoptions) - if quota: - self.addElement(mdd, "quota", quota) - if group_upcall: - self.addElement(mdd, "group_upcall", group_upcall) - - mdd.appendChild(self.ref("node", node_uuid)) - mdd.appendChild(self.ref("target", mds_uuid)) - return mdd - - def mountpoint(self, name, uuid, fs_uuid, path, clientoptions): - mtpt = self.newService("mountpoint", name, uuid) - mtpt.appendChild(self.ref("filesystem", fs_uuid)) - self.addElement(mtpt, "path", path) - if clientoptions: - self.addElement(mtpt, "clientoptions", clientoptions) - return mtpt - - def filesystem(self, name, uuid, mds_uuid, obd_uuid): - fs = self.newService("filesystem", name, uuid) - fs.appendChild(self.ref("mds", mds_uuid)) - fs.appendChild(self.ref("obd", obd_uuid)) - return fs - - def echo_client(self, name, uuid, osc_uuid): - ec = self.newService("echoclient", name, uuid) - ec.appendChild(self.ref("obd", osc_uuid)) - return ec - -############################################################ -# Utilities to query a DOM tree -# Using this functions we can treat use config information -# directly as a database. -def getName(n): - return n.getAttribute('name') - -def getUUID(node): - return node.getAttribute('uuid') - - -def findByName(lustre, name, tag = ""): - for n in lustre.childNodes: - if n.nodeType == n.ELEMENT_NODE: - if tag and n.nodeName != tag: - continue - if getName(n) == name: - return n - else: - n = findByName(n, name) - if n: return n - return None - - -def lookup(node, uuid): - for n in node.childNodes: - if n.nodeType == n.ELEMENT_NODE: - if getUUID(n) == uuid: - return n - else: - n = lookup(n, uuid) - if n: return n - return None - - -def name2uuid(lustre, name, tag="", fatal=1): - ret = findByName(lustre, name, tag) - if not ret: - if fatal: - error('name2uuid:', '"'+name+'"', tag, 'element not found.') - else: - return "" - return getUUID(ret) - -def lookup_filesystem(lustre, mds_uuid, ost_uuid): - for n in lustre.childNodes: - if n.nodeType == n.ELEMENT_NODE and n.nodeName == 'filesystem': - if ref_exists(n, mds_uuid) and ref_exists(n, ost_uuid): - return getUUID(n) - return None - -# XXX: assumes only one network element per node. will fix this -# as soon as support for routers is added -def get_net_uuid(lustre, node_name): - """ get a network uuid for a node_name """ - node = findByName(lustre, node_name, "node") - if not node: - error ('get_net_uuid:', '"'+node_name+'"', "node element not found.") - net = node.getElementsByTagName('network') - if net: - return getUUID(net[0]) - return None - - -def lov_add_obd(gen, lov, osc_uuid): - lov.appendChild(gen.ref("obd", osc_uuid)) - -def ref_exists(profile, uuid): - elist = profile.childNodes - for e in elist: - if e.nodeType == e.ELEMENT_NODE: - ref = e.getAttribute('uuidref') - if ref == uuid: - return 1 - return 0 - -# ensure that uuid is not already in the profile -# return true if uuid is added -def node_add_profile(gen, node, ref, uuid): - refname = "%s_ref" % "profile" - ret = node.getElementsByTagName(refname) - if not ret: - error('node has no profile ref:', node) - prof_uuid = ret[0].getAttribute('uuidref') - profile = lookup(node.parentNode, prof_uuid) - if not profile: - error("no profile found:", prof_uuid) - if ref_exists(profile, uuid): - return 0 - profile.appendChild(gen.ref(ref, uuid)) - return 1 - -def get_attr(dom_node, attr, default=""): - v = dom_node.getAttribute(attr) - if v: - return v - return default - -############################################################ -# Top level commands -# -def runcmd(cmd): - f = os.popen(cmd) - ret = f.close() - if ret: - ret = ret >> 8 - else: - ret = 0 - return ret - -def set_node_options(gen, node, options): - if options.router: - node.setAttribute('router', '1') - if options.timeout: - gen.addElement(node, "timeout", get_option(options, 'timeout')) - if options.upcall: - default_upcall = get_option(options, 'upcall') - else: - default_upcall = '' - if default_upcall or options.lustre_upcall: - if options.lustre_upcall: - gen.addElement(node, 'lustreUpcall', options.lustre_upcall) - else: - gen.addElement(node, 'lustreUpcall', default_upcall) - if options.group_upcall: - gen.addElement(node, 'groupUpcall', options.group_upcall) - if default_upcall or options.portals_upcall: - if options.portals_upcall: - gen.addElement(node, 'portalsUpcall', options.portals_upcall) - else: - gen.addElement(node, 'portalsUpcall', default_upcall) - if options.ptldebug: - gen.addElement(node, "ptldebug", get_option(options, 'ptldebug')) - if options.subsystem: - gen.addElement(node, "subsystem", get_option(options, 'subsystem')) - return node - -def do_add_node(gen, lustre, options, node_name): - uuid = new_uuid(node_name) - prof_name = new_name("PROFILE_" + node_name) - prof_uuid = new_uuid(prof_name) - profile = gen.profile(prof_name, prof_uuid) - node = gen.node(node_name, uuid, prof_uuid) - lustre.appendChild(node) - lustre.appendChild(profile) - - node_add_profile(gen, node, 'ldlm', ldlm_uuid) - set_node_options(gen, node, options) - return node - - -def add_node(gen, lustre, options): - """ create a node with a network config """ - - node_name = get_option(options, 'node') - ret = findByName(lustre, node_name, "node") - if ret: - print "Node:", node_name, "exists." - return - do_add_node(gen, lustre, options, node_name) - - -def add_net(gen, lustre, options): - """ create a node with a network config """ - - node_name = get_option(options, 'node') - nid = get_option(options, 'nid') - cluster_id = get_option(options, 'cluster_id') - hostaddr = get_option(options, 'hostaddr') - net_type = get_option(options, 'nettype') - - if net_type in ('lnet','tcp','openib','ra'): - port = get_option_int(options, 'port') - elif net_type in ('elan','gm','iib','vib','lo','ptl'): - port = 0 - else: - print "Unknown net_type: ", net_type - sys.exit(2) - - real_net_type = net_type - if net_type == 'lnet' and string.find(nid,'@') > 0: - real_net_type = string.split(nid,'@')[1] - - # testing network - if options.nonet: - if options.verbose: - print "Skipping the remote host networking test." - elif (real_net_type == 'tcp') and (nid != '*'): - if options.verbose: - print "Testing network on", node_name - target = string.split(nid,'@')[0] - if target != '*' and target != '\\*': - out = runcmd("ping -c 1 -w 5 %s" %target) - if out != 0: - print "Could not connect to %s, please check network." % node_name - - ret = findByName(lustre, node_name, "node") - if not ret: - node = do_add_node(gen, lustre, options, node_name) - else: - node = ret - set_node_options(gen, node, options) - - net_name = new_name('NET_'+ node_name +'_'+ net_type) - net_uuid = new_uuid(net_name) - node.appendChild(gen.network(net_name, net_uuid, nid, cluster_id, net_type, - hostaddr, port)) - node_add_profile(gen, node, "network", net_uuid) - - -def add_route(gen, lustre, options): - """ create a node with a network config """ - - node_name = get_option(options, 'node') - gw_net_type = get_option(options, 'nettype') - gw = get_option(options, 'gw') - gw_cluster_id = get_option(options, 'gateway_cluster_id') - tgt_cluster_id = get_option(options, 'target_cluster_id') - lo = get_option(options, 'lo') - hi = get_option(options, 'hi') - if not hi: - hi = lo - - node = findByName(lustre, node_name, "node") - if not node: - error (node_name, " not found.") - - rlist = node.getElementsByTagName('routetbl') - if len(rlist) > 0: - rtbl = rlist[0] - else: - rtbl_name = new_name("RTBL_" + node_name) - rtbl_uuid = new_uuid(rtbl_name) - rtbl = gen.routetbl(rtbl_name, rtbl_uuid) - node.appendChild(rtbl) - node_add_profile(gen, node, "routetbl", rtbl_uuid) - rtbl.appendChild(gen.route(gw_net_type, gw, gw_cluster_id, tgt_cluster_id, - lo, hi)) - - -def add_mds(gen, lustre, options): - node_name = get_option(options, 'node') - mds_name = get_option(options, 'mds') - mdd_name = new_name("MDD_" + mds_name +"_" + node_name) - mdd_uuid = new_uuid(mdd_name) - - mds_uuid = name2uuid(lustre, mds_name, 'mds', fatal=0) - if not mds_uuid: - mds_uuid = get_option(options, 'mdsuuid') - if mds_uuid: - if lookup(lustre, mds_uuid): - error("Duplicate MDS UUID:", mds_uuid) - else: - mds_uuid = new_uuid(mds_name) - - mds = gen.mds(mds_name, mds_uuid, mdd_uuid, options.group) - lustre.appendChild(mds) - else: - mds = lookup(lustre, mds_uuid) - if options.failover: - mds.setAttribute('failover', "1") - if options.failout: - mds.setAttribute('failover,',"0") - - devname = get_option(options, 'dev') - size = get_option(options, 'size') - fstype = get_option(options, 'fstype') - journal_size = get_option(options, 'journal_size') - inode_size = get_option(options, 'inode_size') - nspath = get_option(options, 'nspath') - mkfsoptions = get_option(options, 'mkfsoptions') - mountfsoptions = get_option(options, 'mountfsoptions') - quota = get_option(options, 'quota') - group_upcall = get_option(options, 'group_upcall') - - node_uuid = name2uuid(lustre, node_name, 'node') - - node = findByName(lustre, node_name, "node") - node_add_profile(gen, node, "mdsdev", mdd_uuid) - net_uuid = get_net_uuid(lustre, node_name) - if not net_uuid: - error("NODE: ", node_name, "not found") - - mdd = gen.mdsdev(mdd_name, mdd_uuid, fstype, devname, - get_format_flag(options), node_uuid, mds_uuid, - size, journal_size, inode_size, nspath, mkfsoptions, - mountfsoptions, quota, group_upcall) - lustre.appendChild(mdd) - - -def add_ost(gen, lustre, options): - node_name = get_option(options, 'node') - lovname = get_option(options, 'lov') - osdtype = get_option(options, 'osdtype') - - node_uuid = name2uuid(lustre, node_name, 'node') - - if osdtype == 'obdecho': - fstype = '' - devname = '' - size = 0 - fstype = '' - journal_size = '' - inode_size = '' - mkfsoptions = '' - mountfsoptions = '' - quota = '' - else: - devname = get_option(options, 'dev') # can be unset for bluearcs - size = get_option(options, 'size') - fstype = get_option(options, 'fstype') - journal_size = get_option(options, 'journal_size') - inode_size = get_option(options, 'inode_size') - mkfsoptions = get_option(options, 'mkfsoptions') - mountfsoptions = get_option(options, 'mountfsoptions') - quota = get_option(options, 'quota') - - nspath = get_option(options, 'nspath') - - ostname = get_option(options, 'ost') - if not ostname: - ostname = new_name('OST_'+ node_name) - - osdname = new_name("OSD_" + ostname + "_" + node_name) - osd_uuid = new_uuid(osdname) - - ost_uuid = name2uuid(lustre, ostname, 'ost', fatal=0) - if not ost_uuid: - ost_uuid = get_option(options, 'ostuuid') - if ost_uuid: - if lookup(lustre, ost_uuid): - error("Duplicate OST UUID:", ost_uuid) - else: - ost_uuid = new_uuid(ostname) - - ost = gen.ost(ostname, ost_uuid, osd_uuid, options.group) - lustre.appendChild(ost) - if lovname: - lov = findByName(lustre, lovname, "lov") - if not lov: - error('add_ost:', '"'+lovname+'"', "lov element not found.") - lov_add_obd(gen, lov, ost_uuid) - else: - ost = lookup(lustre, ost_uuid) - - if options.failover: - ost.setAttribute('failover', "1") - if options.failout: - ost.setAttribute('failover', "0") - - - osd = gen.osd(osdname, osd_uuid, fstype, osdtype, devname, - get_format_flag(options), ost_uuid, node_uuid, size, - journal_size, inode_size, nspath, mkfsoptions, - mountfsoptions, quota) - - node = findByName(lustre, node_name, "node") - -## if node_add_profile(gen, node, 'oss', oss_uuid): -## ossname = 'OSS' -## oss_uuid = new_uuid(ossname) -## oss = gen.oss(ossname, oss_uuid) -## lustre.appendChild(oss) - - node_add_profile(gen, node, 'osd', osd_uuid) - lustre.appendChild(osd) - - -def add_cobd(gen, lustre, options): - node_name = get_option(options, 'node') - name = new_name('COBD_' + node_name) - uuid = new_uuid(name) - - real_name = get_option(options, 'real_obd') - cache_name = get_option(options, 'cache_obd') - - real_uuid = name2uuid(lustre, real_name, tag='obd') - cache_uuid = name2uuid(lustre, cache_name, tag='obd') - - node = findByName(lustre, node_name, "node") - node_add_profile(gen, node, "cobd", uuid) - cobd = gen.cobd(name, uuid, real_uuid, cache_uuid) - lustre.appendChild(cobd) - - -def add_echo_client(gen, lustre, options): - """ add an echo client to the profile for this node. """ - node_name = get_option(options, 'node') - lov_name = get_option(options, 'ost') - - node = findByName(lustre, node_name, 'node') - - echoname = new_name('ECHO_'+ node_name) - echo_uuid = new_uuid(echoname) - node_add_profile(gen, node, 'echoclient', echo_uuid) - - lov_uuid = name2uuid(lustre, lov_name, tag='lov', fatal=0) - if not lov_uuid: - lov_uuid = name2uuid(lustre, lov_name, tag='ost', fatal=1) - - echo = gen.echo_client(echoname, echo_uuid, lov_uuid) - lustre.appendChild(echo) - - -def add_lov(gen, lustre, options): - """ create a lov """ - - lov_orig = get_option(options, 'lov') - name = new_name(lov_orig) - if name != lov_orig: - warning("name:", lov_orig, "already used. using:", name) - - mds_name = get_option(options, 'mds') - stripe_sz = get_option_int(options, 'stripe_sz') - stripe_cnt = get_option_int(options, 'stripe_cnt') - if stripe_cnt == 0: - info("default stripe count (0) - will use %d stripe(s) per file" \ - % DEFAULT_STRIPE_CNT) - pattern = get_option_int(options, 'stripe_pattern') - uuid = new_uuid(name) - - ret = findByName(lustre, name, "lov") - if ret: - error("LOV: ", name, " already exists.") - - mds_uuid = name2uuid(lustre, mds_name, 'mds') - lov = gen.lov(name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern) - lustre.appendChild(lov) - - # add an lovconfig entry to the active mdsdev profile - lovconfig_name = new_name('LVCFG_' + name) - lovconfig_uuid = new_uuid(lovconfig_name) - mds = findByName(lustre, mds_name, "mds") - mds.appendChild(gen.ref("lovconfig", lovconfig_uuid)) - lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid) - lustre.appendChild(lovconfig) - -def add_default_lov(gen, lustre, mds_name, lov_name): - """ create a default lov """ - - stripe_sz = DEFAULT_STRIPE_SZ - stripe_cnt = DEFAULT_STRIPE_CNT - pattern = DEFAULT_STRIPE_PATTERN - uuid = new_uuid(lov_name) - - ret = findByName(lustre, lov_name, "lov") - if ret: - error("LOV: ", lov_name, " already exists.") - - mds_uuid = name2uuid(lustre, mds_name, 'mds') - lov = gen.lov(lov_name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern) - lustre.appendChild(lov) - - # add an lovconfig entry to the active mdsdev profile - lovconfig_name = new_name('LVCFG_' + lov_name) - lovconfig_uuid = new_uuid(lovconfig_name) - mds = findByName(lustre, mds_name) - mds.appendChild(gen.ref("lovconfig", lovconfig_uuid)) - lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid) - lustre.appendChild(lovconfig) - -def new_filesystem(gen, lustre, mds_uuid, obd_uuid): - fs_name = new_name("FS_fsname") - fs_uuid = new_uuid(fs_name) - mds = lookup(lustre, mds_uuid) - mds.appendChild(gen.ref("filesystem", fs_uuid)) - fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid) - lustre.appendChild(fs) - return fs_uuid - -def get_fs_uuid(gen, lustre, mds_name, obd_name): - mds_uuid = name2uuid(lustre, mds_name, tag='mds') - obd_uuid = name2uuid(lustre, obd_name, tag='lov', fatal=0) - fs_uuid = lookup_filesystem(lustre, mds_uuid, obd_uuid) - if not fs_uuid: - fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid) - return fs_uuid - -def add_mtpt(gen, lustre, options): - """ create mtpt on a node """ - node_name = get_option(options, 'node') - - path = get_option(options, 'path') - clientoptions = get_option(options, "clientoptions") - fs_name = get_option(options, 'filesystem') - - lov_name = get_option(options, 'lov') - ost_name = get_option(options, 'ost') - mds_name = get_option(options, 'mds') - if lov_name == '': - if ost_name == '': - error("--add mtpt requires --lov lov_name or --ost ost_name") - else: - warning("use default value for lov, due no --lov lov_name provided") - lov_name = new_name("lov_default") - add_default_lov(gen, lustre, mds_name, lov_name) - ost_uuid = name2uuid(lustre, ost_name, 'ost', fatal=0) - if not ost_uuid: - error('add_mtpt:', '"'+ost_name+'"', "ost element not found.") - lov = findByName(lustre, lov_name, "lov") - lov_add_obd(gen, lov, ost_uuid) - - if fs_name == '': - fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name) - else: - fs_uuid = name2uuid(lustre, fs_name, tag='filesystem') - - name = new_name('MNT_'+ node_name) - - ret = findByName(lustre, name, "mountpoint") - if ret: - # this can't happen, because new_name creates unique names - error("MOUNTPOINT: ", name, " already exists.") - - uuid = new_uuid(name) - mtpt = gen.mountpoint(name, uuid, fs_uuid, path, clientoptions) - node = findByName(lustre, node_name, "node") - if not node: - error('node:', node_name, "not found.") - node_add_profile(gen, node, "mountpoint", uuid) - lustre.appendChild(mtpt) - -############################################################ -# Command line processing -# -class OptionError (exceptions.Exception): - def __init__(self, args): - self.args = args - -def get_option(options, tag): - """Look for tag in options hash and return the value if set. If not - set, then if return default it is set, otherwise exception.""" - if options.__getattr__(tag) != None: - return options.__getattr__(tag) - else: - raise OptionError("--add %s requires --%s " % (options.add, tag)) - -def get_option_int(options, tag): - """Return an integer option. Raise exception if the value is not an int""" - val = get_option(options, tag) - try: - n = int(val) - except ValueError: - raise OptionError("--%s (value must be integer)" % (tag)) - return n - -# simple class for profiling -import time -class chrono: - def __init__(self): - self._start = 0 - def start(self): - self._stop = 0 - self._start = time.time() - def stop(self, msg=''): - self._stop = time.time() - if msg: - self.display(msg) - def dur(self): - return self._stop - self._start - def display(self, msg): - d = self.dur() - str = '%s: %g secs' % (msg, d) - print str - -################################################################# -# function cmdlinesplit used to split cmd line from batch file -# -def cmdlinesplit(cmdline): - - double_quote = re.compile(r'"(([^"\\]|\\.)*)"') - single_quote = re.compile(r"'(.*?)'") - escaped = re.compile(r'\\(.)') - esc_quote = re.compile(r'\\([\\"])') - outside = re.compile(r"""([^\s\\'"]+)""") - - arg_list = [] - i = 0; arg = None - while i < len(cmdline): - c = cmdline[i] - if c == '"': - match = double_quote.match(cmdline, i) - if not match: - print "Unmatched double quote:", cmdline - sys.exit(1) - i = match.end() - if arg is None: arg = esc_quote.sub(r'\1', match.group(1)) - else: arg = arg + esc_quote.sub(r'\1', match.group(1)) - - elif c == "'": - match = single_quote.match(cmdline, i) - if not match: - print "Unmatched single quote:", cmdline - sys.exit(1) - i = match.end() - if arg is None: arg = match.group(1) - else: arg = arg + match.group(1) - - elif c == "\\": - match = escaped.match(cmdline, i) - if not match: - print "Unmatched backslash", cmdline - sys.exit(1) - i = match.end() - if arg is None: arg = match.group(1) - else: arg = arg + match.group(1) - - elif c in string.whitespace: - if arg != None: - arg_list.append(str(arg)) - arg = None - while i < len(cmdline) and cmdline[i] in string.whitespace: - i = i + 1 - else: - match = outside.match(cmdline, i) - assert match - i = match.end() - if arg is None: arg = match.group() - else: arg = arg + match.group() - - if arg != None: arg_list.append(str(arg)) - - return arg_list - -############################################################ -# Main -# - -def add(devtype, gen, lustre, options): - if devtype == 'net': - add_net(gen, lustre, options) - elif devtype == 'mtpt': - add_mtpt(gen, lustre, options) - elif devtype == 'mds': - add_mds(gen, lustre, options) - elif devtype == 'ost': - add_ost(gen, lustre, options) - elif devtype == 'lov': - add_lov(gen, lustre, options) - elif devtype == 'route': - add_route(gen, lustre, options) - elif devtype == 'node': - add_node(gen, lustre, options) - elif devtype == 'echo_client': - add_echo_client(gen, lustre, options) - elif devtype == 'cobd': - add_cobd(gen, lustre, options) - else: - error("unknown device type:", devtype) - -def do_command(gen, lustre, options, args): - if options.add: - add(options.add, gen, lustre, options) - else: - error("Missing command") - -def main(): - cl = Lustre.Options("lmc", "", lmc_options) - try: - options, args = cl.parse(sys.argv[1:]) - except Lustre.OptionError, e: - panic("lmc", e) - - if len(args) > 0: - panic(string.join(sys.argv), "Unexpected extra arguments on command line: " + string.join(args)) - - if options.reference: - reference() - sys.exit(0) - - outFile = '-' - - if options.merge: - outFile = options.merge - if os.access(outFile, os.R_OK): - doc = xml.dom.minidom.parse(outFile) - else: - doc = new_lustre(xml.dom.minidom) - elif options.input: - doc = xml.dom.minidom.parse(options.input) - else: - doc = new_lustre(xml.dom.minidom) - - if options.output: - outFile = options.output - - lustre = doc.documentElement - init_names(lustre) - if lustre.tagName != "lustre": - print "Existing config not valid." - sys.exit(1) - - gen = GenConfig(doc) - - if options.batch: - fp = open(options.batch) - batchCommands = fp.readlines() - fp.close() - for cmd in batchCommands: - try: - options, args = cl.parse(cmdlinesplit(cmd)) - if options.merge or options.input or options.output: - print "The batchfile should not contain --merge, --input or --output." - sys.exit(1) - do_command(gen, lustre, options, args) - except OptionError, e: - panic(cmd, e) - except Lustre.OptionError, e: - panic(cmd, e) - else: - try: - do_command(gen, lustre, options, args) - except OptionError, e: - panic(string.join(sys.argv),e) - except Lustre.OptionError, e: - panic("lmc", e) - - #record timestamp - timestr = string.split(str(time.time()), '.') - gen.recordtime(timestr[0]) - - if outFile == '-': - printDoc(doc) - else: - printDoc(doc, open(outFile,"w")) - -if __name__ == "__main__": - main() diff --git a/lustre/utils/mds-failover-sample b/lustre/utils/mds-failover-sample deleted file mode 100755 index f6269f4..0000000 --- a/lustre/utils/mds-failover-sample +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh - -MDS=NET_mds_tcp_UUID -MDSHOST=mds - -/r/src/lustre/utils/lctl <