# Based in part on the XML obdctl modifications done by Brian Behlendorf
import sys, getopt
-import string, os, stat, popen2
+import string, os, stat, popen2, socket, time
import re, exceptions
import xml.dom.minidom
# Global parameters
-TCP_ACCEPTOR = 'acceptor'
-options = {}
+TCP_ACCEPTOR = ''
#
# Maximum number of devices to search for.
print """usage: lconf config.xml
config.xml Lustre configuration in xml format.
+--get <url> URL to fetch a config file
+--node <nodename> Load config for <nodename>
+-d | --cleanup Cleans up config. (Shutdown)
-v | --verbose Print system commands as they are run
--d | --debug Print system commands, but does not run them
---host <hostname> Load config for <hostname>
---cleanup Cleans up config. (Shutdown)
-h | --help Print this help
+--gdb Prints message after creating gdb module script
+ and sleeps for 5 seconds.
+-n | --noexec Prints the commands and steps that will be run for a
+ config without executing them. This can used to check if a
+ config file is doing what it should be doing. (Implies -v)
+--nomod Skip load/unload module step.
+--nosetup Skip device setup/cleanup step.
"""
TODO = """
--ldap server LDAP server with lustre config database
+--makeldiff Translate xml source to LDIFF
--reformat Reformat all devices (will confirm)
+This are perhaps not needed:
--lustre="src dir" Base directory of lustre sources. Used to search
for modules.
--portals=src Portals source
---makeldiff Translate xml source to LDIFF
---iam myname ??
"""
sys.exit()
+# ============================================================
+# Config parameters, encapsulated in a class
+class Config:
+ def __init__(self):
+ # flags
+ self._noexec = 0
+ self._verbose = 0
+ self._reformat = 0
+ self._cleanup = 0
+ self._gdb = 0
+ self._nomod = 0
+ self._nosetup = 0
+ # parameters
+ self._modules = None
+ self._node = None
+ self._url = None
+ self._gdb_script = '/tmp/ogdb'
+ self._debug_path = '/tmp/lustre-log'
+ self._src_dir = None
+
+ def verbose(self, flag = None):
+ if flag: self._verbose = flag
+ return self._verbose
+
+ def noexec(self, flag = None):
+ if flag: self._noexec = flag
+ return self._noexec
+
+ def reformat(self, flag = None):
+ if flag: self._reformat = flag
+ return self._reformat
+
+ def cleanup(self, flag = None):
+ if flag: self._cleanup = flag
+ return self._cleanup
+
+ def gdb(self, flag = None):
+ if flag: self._gdb = flag
+ return self._gdb
+
+ def nomod(self, flag = None):
+ if flag: self._nomod = flag
+ return self._nomod
+
+ def nosetup(self, flag = None):
+ if flag: self._nosetup = flag
+ return self._nosetup
+
+ def node(self, val = None):
+ if val: self._node = val
+ return self._node
+
+ def url(self, val = None):
+ if val: self._url = val
+ return self._url
+
+ def gdb_script(self):
+ if os.path.isdir('/r'):
+ return '/r' + self._gdb_script
+ else:
+ return self._gdb_script
+
+ def debug_path(self):
+ if os.path.isdir('/r'):
+ return '/r' + self._debug_path
+ else:
+ return self._debug_path
+
+ def src_dir(self, val = None):
+ if val: self._url = val
+ return self._url
+
+config = Config()
+
# ============================================================
# debugging and error funcs
def fixme(msg = "this feature"):
- raise RuntimeError, msg + ' not implmemented yet.'
+ raise LconfError, msg + ' not implmemented yet.'
def panic(*args):
msg = string.join(map(str,args))
- print msg
- raise RuntimeError, msg
+ if not config.noexec():
+ raise LconfError(msg)
def log(*args):
msg = string.join(map(str,args))
print string.strip(s)
def debug(*args):
- msg = string.join(map(str,args))
- if isverbose(): print msg
-
-def isverbose():
- return options.has_key('verbose') and options['verbose'] == 1
-
-def isnotouch():
- return options.has_key('debug') and options['debug'] == 1
+ if config.verbose():
+ msg = string.join(map(str,args))
+ print msg
# ============================================================
# locally defined exceptions
class CommandError (exceptions.Exception):
- def __init__(self, args=None):
+ def __init__(self, cmd_name, cmd_err, rc=None):
+ self.cmd_name = cmd_name
+ self.cmd_err = cmd_err
+ self.rc = rc
+
+ def dump(self):
+ import types
+ if type(self.cmd_err) == types.StringType:
+ if self.rc:
+ print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
+ else:
+ print "! %s: %s" % (self.cmd_name, self.cmd_err)
+ elif type(self.cmd_err) == types.ListType:
+ if self.rc:
+ print "! %s (error %d):" % (self.cmd_name, self.rc)
+ else:
+ print "! %s:" % (self.cmd_name)
+ for s in self.cmd_err:
+ print "> %s" %(string.strip(s))
+ else:
+ print self.cmd_err
+
+class LconfError (exceptions.Exception):
+ def __init__(self, args):
self.args = args
+
# ============================================================
# handle lctl interface
class LCTLInterface:
"""
Initialize close by finding the lctl binary.
"""
- syspath = string.split(os.environ['PATH'], ':')
- syspath.insert(0, "../utils");
- self.lctlcmd = None
- for d in syspath:
- lctl = os.path.join(d,cmd)
- if os.access(lctl, os.X_OK):
- self.lctl = lctl
- break
+ self.lctl = find_prog(cmd)
if not self.lctl:
- raise RuntimeError, "unable to find lctl binary."
+ if config.noexec():
+ debug('! lctl not found')
+ self.lctl = 'lctl'
+ else:
+ raise CommandError('lctl', "unable to find lctl binary.")
def run(self, cmds):
"""
create complex command line options
"""
debug("+", self.lctl, cmds)
- if isnotouch(): return ([], 0)
+ if config.noexec(): return (0, [])
p = popen2.Popen3(self.lctl, 1)
p.tochild.write(cmds + "\n")
p.tochild.close()
out = p.fromchild.readlines()
- ret = p.poll()
err = p.childerr.readlines()
+ ret = p.wait()
if ret or len(err):
- log (self.lctl, "error:", ret)
- logall(err)
- raise CommandError, err
+ raise CommandError(self.lctl, err, ret)
return ret, out
-
-
- # create a new device with lctl
+
+
def network(self, net, nid):
- cmds = """
+ """ initialized network and add "self" """
+ # Idea: "mynid" could be used for all network types to add "self," and then
+ # this special case would be gone and the "self" hack would be hidden.
+ if net == 'tcp':
+ cmds = """
network %s
mynid %s
+ add_uuid self %s
+ quit""" % (net, nid, nid)
+ else:
+ cmds = """
+ network %s
+ add_uuid self %s
quit""" % (net, nid)
+
self.run(cmds)
# create a new connection
- def connect(self, net, nid, port, servuuid):
+ def connect(self, net, nid, port, servuuid, send_buf, read_buf):
+ # XXX: buf size params not used yet
cmds = """
network %s
connect %s %d
quit""" % (net, nid, port, servuuid, nid)
self.run(cmds)
- # create a new device with lctl
+ # create a new connection
+ def add_route(self, net, to, via):
+ cmds = """
+ """
+ #self.run(cmds)
+
+ # disconnect one connection
def disconnect(self, net, nid, port, servuuid):
cmds = """
network %s
disconnect %s
- quit""" % (net, nid)
+ del_uuid %s
+ quit""" % (net, nid, servuuid)
+ self.run(cmds)
+
+ # disconnect all connections
+ def disconnectAll(self, net):
+ cmds = """
+ network %s
+ disconnect
+ del_uuid self
+ quit""" % (net)
self.run(cmds)
# create a new device with lctl
- def newdev(self, attach, setup):
+ def newdev(self, attach, setup = ""):
cmds = """
newdev
attach %s
self.run(cmds)
# create an lov
- def lovconfig(self, uuid, mdcuuid, stripe_cnt, stripe_sz, pattern, devlist):
+ def lovconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist):
cmds = """
device $%s
probe
- lovconfig %s %d %d %s %s
- quit""" % (mdcuuid, uuid, stripe_cnt, stripe_sz, pattern, devlist)
+ lovconfig %s %d %d %d %s %s
+ quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist)
self.run(cmds)
# ============================================================
def run(*args):
cmd = string.join(map(str,args))
debug ("+", cmd)
- if isnotouch(): return ([], 0)
+ if config.noexec(): return (0, [])
f = os.popen(cmd + ' 2>&1')
out = f.readlines()
ret = f.close()
ret = 0
return (ret, out)
+# Run a command in the background.
+def run_daemon(*args):
+ cmd = string.join(map(str,args))
+ debug ("+", cmd)
+ if config.noexec(): return 0
+ f = os.popen(cmd + ' 2>&1')
+ ret = f.close()
+ if ret:
+ ret = ret >> 8
+ else:
+ ret = 0
+ return ret
+
+# Determine full path to use for an external command
+# searches dirname(argv[0]) first, then PATH
+def find_prog(cmd):
+ syspath = string.split(os.environ['PATH'], ':')
+ cmdpath = os.path.dirname(sys.argv[0])
+ syspath.insert(0, cmdpath);
+ syspath.insert(0, os.path.join(cmdpath, '../../portals/linux/utils/'))
+ for d in syspath:
+ prog = os.path.join(d,cmd)
+ if os.access(prog, os.X_OK):
+ return prog
+ return ''
+
+# Recursively look for file starting at base dir
+def do_find_file(base, mod):
+ fullname = os.path.join(base, mod)
+ if os.access(fullname, os.R_OK):
+ return fullname
+ for d in os.listdir(base):
+ dir = os.path.join(base,d)
+ if os.path.isdir(dir):
+ module = do_find_file(dir, mod)
+ if module:
+ return module
+
+def find_module(src_dir, modname):
+ mod = '%s.o' % (modname)
+ search = (src_dir + "/lustre", src_dir + "/portals/linux")
+ for d in search:
+ try:
+ module = do_find_file(d, mod)
+ if module:
+ return module
+ except OSError:
+ pass
+ return None
# is the path a block device?
def is_block(path):
# build fs according to type
# fixme: dangerous
def mkfs(fstype, dev):
- if(fstype == 'ext3'):
- mkfs = 'mkfs.ext2 -j -b 4096'
- elif (fstype == 'extN'):
+ if(fstype in ('ext3', 'extN')):
mkfs = 'mkfs.ext2 -j -b 4096'
else:
print 'unsupported fs type: ', fstype
force = '-F'
else:
force = ''
- run (mkfs, force, dev)
+ (ret, out) = run (mkfs, force, dev)
+ if ret:
+ panic("Unable to build fs:", dev)
+ # enable hash tree indexing on fs
+ if fstype == 'extN':
+ htree = 'echo "feature FEATURE_C5" | debugfs -w'
+ (ret, out) = run (htree, dev)
+ if ret:
+ panic("Unable to enable htree:", dev)
# some systems use /dev/loopN, some /dev/loop/N
def loop_base():
log('unable to clean loop device:', dev, 'for file:', file)
logall(out)
+# determine if dev is formatted as a <fstype> filesystem
+def need_format(fstype, dev):
+ # FIXME don't know how to implement this
+ return 0
+
# initialize a block device if needed
def block_dev(dev, size, fstype, format):
- if isnotouch(): return dev
+ if config.noexec(): return dev
if not is_block(dev):
dev = init_loop(dev, size, fstype)
- if (format == 'yes'):
+ if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
mkfs(fstype, dev)
- return dev
-# ============================================================
-# Functions to prepare the various objects
+# else:
+# panic("device:", dev,
+# "not prepared, and autoformat is not set.\n",
+# "Rerun with --reformat option to format ALL filesystems")
+
+ return dev
-def prepare_ldlm(node):
- (name, uuid) = getNodeAttr(node)
- print 'LDLM:', name, uuid
- lctl.newdev(attach="ldlm %s %s" % (name, uuid),
- setup ="")
-
-def prepare_lov(node):
- (name, uuid, mdcuuid, stripe_cnt, strip_sz, pattern, devlist, mdsname) = getLOVInfo(node)
- print 'LOV:', name, uuid, mdcuuid, stripe_cnt, strip_sz, pattern, devlist, mdsname
- lctl.lovconfig(uuid, mdsname, stripe_cnt, strip_sz, pattern, devlist)
- lctl.newdev(attach="lov %s %s" % (name, uuid),
- setup ="%s" % (mdcuuid))
-
-def prepare_network(node):
- (name, uuid, type, nid, port) = getNetworkInfo(node)
- print 'NETWORK:', name, uuid, type, nid, port
- if type == 'tcp':
- run(TCP_ACCEPTOR, port)
- lctl.network(type, nid)
-
-
-# need to check /proc/mounts and /etc/mtab before
-# formatting anything.
-# FIXME: check if device is already formatted.
-def prepare_obd(obd):
- (name, uuid, obdtype, dev, size, fstype, format) = getOBDInfo(obd)
- print 'OBD:', name, uuid, obdtype, dev, size, fstype, format
- dev = block_dev(dev, size, fstype, format)
- lctl.newdev(attach="%s %s %s" % (obdtype, name, uuid),
- setup ="%s %s" %(dev, fstype))
-
+def get_local_address(net_type):
+ """Return the local address for the network type."""
+ local = ""
+ if net_type == 'tcp':
+ # host `hostname`
+ host = socket.gethostname()
+ local = socket.gethostbyname(host)
+ elif net_type == 'elan':
+ # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position'
+ try:
+ fp = open('/proc/elan/device0/position', 'r')
+ lines = fp.readlines()
+ fp.close()
+ for l in lines:
+ a = string.split(l)
+ if a[0] == 'NodeId':
+ local = a[1]
+ break
+ except IOError, e:
+ log(e)
+ elif net_type == 'gm':
+ fixme("automatic local address for GM")
+ return local
+
+
-def prepare_ost(ost):
- name, uuid, obd = getOSTInfo(ost)
- print 'OST:', name, uuid, obd
- lctl.newdev(attach="ost %s %s" % (name, uuid),
- setup ="$%s" % (obd))
-
-def prepare_mds(node):
- (name, uuid, dev, size, fstype, format) = getMDSInfo(node)
- print 'MDS:', name, uuid, dev, size, fstype
- # setup network for mds, too
- dev = block_dev(dev, size, fstype, format)
- lctl.newdev(attach="mds %s %s" % (name, uuid),
- setup ="%s %s" %(dev, fstype))
-
-def prepare_osc(node):
- (name, uuid, obduuid, ostuuid) = getOSCInfo(node)
- print 'OSC:', name, uuid, obduuid, ostuuid
- net = lookup(node.parentNode, ostuuid)
- srvname, srvuuid, net, server, port = getNetworkInfo(net)
- lctl.connect(net, server, port, ostuuid)
- lctl.newdev(attach="osc %s %s" % (name, uuid),
- setup ="%s %s" %(obduuid, ostuuid))
-
-def prepare_mdc(node):
- (name, uuid, mdsuuid, netuuid) = getMDCInfo(node)
- print 'MDC:', name, uuid, mdsuuid, netuuid
- net = lookup(node.parentNode, netuuid)
- srvname, srvuuid, net, server, port = getNetworkInfo(net)
- lctl.connect(net, server, port, netuuid)
- lctl.newdev(attach="mdc %s %s" % (name, uuid),
- setup ="%s %s" %(mdsuuid, netuuid))
-
-def prepare_mountpoint(node):
- name, uuid, oscuuid, mdcuuid, mtpt = getMTPTInfo(node)
- print 'MTPT:', name, uuid, oscuuid, mdcuuid, mtpt
- cmd = "mount -t lustre_lite -o ost=%s,mds=%s none %s" % \
- (oscuuid, mdcuuid, mtpt)
- run("mkdir", mtpt)
- run(cmd)
# ============================================================
-# Functions to cleanup the various objects
-
-def cleanup_ldlm(node):
- (name, uuid) = getNodeAttr(node)
- print 'LDLM:', name, uuid
- try:
- lctl.cleanup(name, uuid)
- except CommandError:
- print "cleanup failed: ", name
-
-def cleanup_lov(node):
- (name, uuid, mdcuuid, stripe_cnt, strip_sz, pattern, devlist, mdsname) = getLOVInfo(node)
- print 'LOV:', name, uuid, mdcuuid, stripe_cnt, strip_sz, pattern, devlist, mdsname
- try:
- lctl.cleanup(name, uuid)
- except CommandError:
- print "cleanup failed: ", name
-
-def cleanup_network(node):
- (name, uuid, type, nid, port) = getNetworkInfo(node)
- print 'NETWORK:', name, uuid, type, nid, port
- #lctl.network(type, nid)
-
-# need to check /proc/mounts and /etc/mtab before
-# formatting anything.
-# FIXME: check if device is already formatted.
-def cleanup_obd(obd):
- (name, uuid, obdtype, dev, size, fstype, format) = getOBDInfo(obd)
- print "OBD: ", name, obdtype, dev, size, fstype, format
- try:
- lctl.cleanup(name, uuid)
- except CommandError:
- print "cleanup failed: ", name
- clean_loop(dev)
-
-def cleanup_ost(ost):
- name, uuid, obd = getOSTInfo(ost)
- print "OST: ", name, uuid, obd
- try:
- lctl.cleanup(name, uuid)
- except CommandError:
- print "cleanup failed: ", name
-
-def cleanup_mds(node):
- (name, uuid, dev, size, fstype, format) = getMDSInfo(node)
- print "MDS: ", name, dev, size, fstype
- try:
- lctl.cleanup(name, uuid)
- except CommandError:
- print "cleanup failed: ", name
- clean_loop(dev)
+# Classes to prepare and cleanup the various objects
+#
+class Module:
+ """ Base class for the rest of the modules. The default cleanup method is
+ defined here, as well as some utilitiy funcs.
+ """
+ def __init__(self, tag_name, node):
+ self.dom_node = node
+ self.tag_name = tag_name
+ self.name = node.getAttribute('name')
+ self.uuid = node.getAttribute('uuid')
+ self.kmodule_list = []
+
+ def info(self, *args):
+ msg = string.join(map(str,args))
+ print self.tag_name + ":", self.name, self.uuid, msg
+
+ def cleanup(self):
+ """ default cleanup, used for most modules """
+ self.info()
+ try:
+ lctl.cleanup(self.name, self.uuid)
+ except CommandError, e:
+ print "cleanup failed: ", self.name
+
+ def add_module(self, modname):
+ """Append a module to list of modules to load."""
+ self.kmodule_list.append(modname)
+
+ def mod_loaded(self, modname):
+ """Check if a module is already loaded. Look in /proc/modules for it."""
+ fp = open('/proc/modules')
+ lines = fp.readlines()
+ fp.close()
+ # please forgive my tired fingers for this one
+ ret = filter(lambda word, mod=modname: word == mod,
+ map(lambda line: string.split(line)[0], lines))
+ return ret
+
+ def load_module(self):
+ """Load all the modules in the list in the order they appear."""
+ for mod in self.kmodule_list:
+ # (rc, out) = run ('/sbin/lsmod | grep -s', mod)
+ if self.mod_loaded(mod) and not config.noexec():
+ continue
+ log ('loading module:', mod)
+ if config.src_dir():
+ module = find_module(config.src_dir(), mod)
+ if not module:
+ panic('module not found:', mod)
+ (rc, out) = run('/sbin/insmod', module)
+ if rc:
+ raise CommandError('insmod', out, rc)
+ else:
+ (rc, out) = run('/sbin/modprobe', mod)
+ if rc:
+ raise CommandError('modprobe', out, rc)
+
+ def cleanup_module(self):
+ """Unload the modules in the list in reverse order."""
+ rev = self.kmodule_list
+ rev.reverse()
+ for mod in rev:
+ if not self.mod_loaded(mod):
+ continue
+ log('unloading module:', mod)
+ if config.noexec():
+ continue
+ (rc, out) = run('/sbin/rmmod', mod)
+ if rc:
+ log('! unable to unload module:', mod)
+ logall(out)
-def cleanup_mdc(node):
- (name, uuid, mdsuuid, netuuid) = getMDCInfo(node)
- print 'MDC:', name, uuid, mdsuuid, netuuid
- net = lookup(node.parentNode, netuuid)
- srvname, srvuuid, net, server, port = getNetworkInfo(net)
- try:
- lctl.disconnect(net, server, port, netuuid)
- lctl.cleanup(name, uuid)
- except CommandError:
- print "cleanup failed: ", name
-
-
-def cleanup_osc(node):
- (name, uuid, obduuid, ostuuid) = getOSCInfo(node)
- print 'OSC:', name, uuid, obduuid, ostuuid
- net = lookup(node.parentNode, ostuuid)
- srvname, srvuuid, net, server, port = getNetworkInfo(net)
- try:
- lctl.disconnect(net, server, port, ostuuid)
- lctl.cleanup(name, uuid)
- except CommandError:
- print "cleanup failed: ", name
-
-def cleanup_mountpoint(node):
- name, uuid, oscuuid, mdcuuid, mtpt = getMTPTInfo(node)
- print 'MTPT:', name, uuid, oscuuid, mdcuuid, mtpt
- run("umount", mtpt)
-
+class Network(Module):
+ def __init__(self,node):
+ Module.__init__(self, 'NETWORK', node)
+ self.net_type = node.getAttribute('type')
+ self.nid = getText(node, 'server', '*')
+ self.port = int(getText(node, 'port', 0))
+ self.send_buf = int(getText(node, 'send_buf', 0))
+ self.read_buf = int(getText(node, 'read_buf', 0))
+ if self.nid == '*':
+ self.nid = get_local_address(self.net_type)
+ if not self.nid:
+ panic("unable to set nid for", self.net_type)
+
+ self.add_module('portals')
+ if self.net_type == 'tcp':
+ self.add_module('ksocknal')
+ if self.net_type == 'elan':
+ self.add_module('kqswnal')
+ if self.net_type == 'gm':
+ self.add_module('kgmnal')
+ self.add_module('obdclass')
+ self.add_module('ptlrpc')
+
+ def prepare(self):
+ self.info(self.net_type, self.nid, self.port)
+ if self.net_type == 'tcp':
+ ret = run_daemon(TCP_ACCEPTOR, self.port)
+ if ret:
+ raise CommandError(TCP_ACCEPTOR, 'failed', ret)
+ lctl.network(self.net_type, self.nid)
+ lctl.newdev(attach = "ptlrpc RPCDEV")
+
+ def cleanup(self):
+ self.info(self.net_type, self.nid, self.port)
+ try:
+ lctl.cleanup("RPCDEV", "")
+ except CommandError, e:
+ print "cleanup failed: ", self.name
+ try:
+ lctl.disconnectAll(self.net_type)
+ except CommandError, e:
+ print "cleanup failed: ", self.name
+ if self.net_type == 'tcp':
+ # yikes, this ugly! need to save pid in /var/something
+ run("killall acceptor")
+
+class LDLM(Module):
+ def __init__(self,node):
+ Module.__init__(self, 'LDLM', node)
+ self.add_module('ldlm')
+ def prepare(self):
+ self.info()
+ lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
+ setup ="")
+
+class LOV(Module):
+ def __init__(self,node):
+ Module.__init__(self, 'LOV', node)
+ devs = node.getElementsByTagName('devices')[0]
+ self.stripe_sz = int(devs.getAttribute('stripesize'))
+ self.stripe_off = int(devs.getAttribute('stripeoffset'))
+ self.pattern = int(devs.getAttribute('pattern'))
+ mdsref = node.getElementsByTagName('mds_ref')[0]
+ self.mdsuuid = mdsref.getAttribute('uuidref')
+ mds= lookup(node.parentNode, self.mdsuuid)
+ self.mdsname = getName(mds)
+ devlist = ""
+ stripe_cnt = 0
+ for child in devs.childNodes:
+ if child.nodeName == 'osc_ref':
+ devlist = devlist + child.getAttribute('uuidref') + " "
+ stripe_cnt = stripe_cnt + 1
+ self.devlist = devlist
+ self.stripe_cnt = stripe_cnt
+ self.add_module('osc')
+ self.add_module('lov')
+
+ def prepare(self):
+ self.info(self.mdsuuid, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern,
+ self.devlist, self.mdsname)
+ lctl.lovconfig(self.uuid, self.mdsname, self.stripe_cnt,
+ self.stripe_sz, self.stripe_off, self.pattern,
+ self.devlist)
+
+ def cleanup(self):
+ pass
+
+class MDS(Module):
+ def __init__(self,node):
+ Module.__init__(self, 'MDS', node)
+ self.devname, self.size = getDevice(node)
+ self.fstype = getText(node, 'fstype')
+ self.format = getText(node, 'autoformat', "no")
+ if self.fstype == 'extN':
+ self.add_module('extN')
+ self.add_module('mds')
+ self.add_module('mds_%s' % (self.fstype))
+
+ def prepare(self):
+ self.info(self.devname, self.fstype, self.format)
+ blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
+ lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
+ setup ="%s %s" %(blkdev, self.fstype))
+ def cleanup(self):
+ Module.cleanup(self)
+ clean_loop(self.devname)
+
+class MDC(Module):
+ def __init__(self,node):
+ Module.__init__(self, 'MDC', node)
+ ref = node.getElementsByTagName('mds_ref')[0]
+ self.mds_uuid = ref.getAttribute('uuidref')
+ self.add_module('mdc')
+
+ def prepare(self):
+ self.info(self.mds_uuid)
+ mds = lookup(self.dom_node.parentNode, self.mds_uuid)
+ if mds == None:
+ panic(self.mdsuuid, "not found.")
+ net = get_ost_net(self.dom_node.parentNode, self.mds_uuid)
+ srv = Network(net)
+ lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_buf, srv.read_buf)
+ lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
+ setup ="%s %s" %(self.mds_uuid, srv.uuid))
+
+ def cleanup(self):
+ self.info(self.mds_uuid)
+ net = get_ost_net(self.dom_node.parentNode, self.mds_uuid)
+ srv = Network(net)
+ try:
+ lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+ except CommandError:
+ print "disconnect failed: ", self.name
+ try:
+ lctl.cleanup(self.name, self.uuid)
+ except CommandError:
+ print "cleanup failed: ", self.name
+
+class OBD(Module):
+ def __init__(self, node):
+ Module.__init__(self, 'OBD', node)
+ self.obdtype = node.getAttribute('type')
+ self.devname, self.size = getDevice(node)
+ self.fstype = getText(node, 'fstype')
+ self.format = getText(node, 'autoformat', 'yes')
+ if self.fstype == 'extN':
+ self.add_module('extN')
+ self.add_module(self.obdtype)
+
+ # need to check /proc/mounts and /etc/mtab before
+ # formatting anything.
+ # FIXME: check if device is already formatted.
+ def prepare(self):
+ self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
+ if self.obdtype == 'obdecho':
+ blkdev = ''
+ else:
+ blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
+ lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
+ setup ="%s %s" %(blkdev, self.fstype))
+ def cleanup(self):
+ Module.cleanup(self)
+ if not self.obdtype == 'obdecho':
+ clean_loop(self.devname)
+
+class OST(Module):
+ def __init__(self,node):
+ Module.__init__(self, 'OST', node)
+ ref = node.getElementsByTagName('obd_ref')[0]
+ self.obd_uuid = ref.getAttribute('uuidref')
+ self.add_module('ost')
+
+ def prepare(self):
+ self.info(self.obd_uuid)
+ lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
+ setup ="%s" % (self.obd_uuid))
+
+class OSC(Module):
+ def __init__(self,node):
+ Module.__init__(self, 'OSC', node)
+ ref = node.getElementsByTagName('obd_ref')[0]
+ self.obd_uuid = ref.getAttribute('uuidref')
+ ref = node.getElementsByTagName('ost_ref')[0]
+ self.ost_uuid = ref.getAttribute('uuidref')
+ self.add_module('osc')
+
+ def prepare(self):
+ self.info(self.obd_uuid, self.ost_uuid)
+ net = get_ost_net(self.dom_node.parentNode, self.ost_uuid)
+ srv = Network(net)
+ lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_buf, srv.read_buf)
+ lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
+ setup ="%s %s" %(self.obd_uuid, srv.uuid))
+
+ def cleanup(self):
+ self.info(self.obd_uuid, self.ost_uuid)
+ net_uuid = get_ost_net(self.dom_node.parentNode, self.ost_uuid)
+ srv = Network(net_uuid)
+ try:
+ lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+ except CommandError:
+ print " failed: ", self.name
+ try:
+ lctl.cleanup(self.name, self.uuid)
+ except CommandError:
+ print "cleanup failed: ", self.name
+
+class Mountpoint(Module):
+ def __init__(self,node):
+ Module.__init__(self, 'MTPT', node)
+ self.path = getText(node, 'path')
+ ref = node.getElementsByTagName('mdc_ref')[0]
+ self.mdc_uuid = ref.getAttribute('uuidref')
+ ref = node.getElementsByTagName('osc_ref')[0]
+ self.lov_uuid = ref.getAttribute('uuidref')
+ self.add_module('osc')
+ self.add_module('llite')
+
+ def prepare(self):
+ l = lookup(self.dom_node.parentNode, self.lov_uuid)
+ if l.nodeName == 'lov':
+ lov = LOV(l)
+ for osc_uuid in string.split(lov.devlist):
+ osc = lookup(self.dom_node.parentNode, osc_uuid)
+ if osc:
+ n = OSC(osc)
+ n.prepare()
+ else:
+ panic('osc not found:', osc_uuid)
+ lctl.newdev(attach="lov %s %s" % (lov.name, lov.uuid),
+ setup ="%s" % (self.mdc_uuid))
+ else:
+ osc = OSC(l)
+ osc.prepare()
+
+ self.info(self.path, self.mdc_uuid,self.lov_uuid)
+ cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
+ (self.lov_uuid, self.mdc_uuid, self.path)
+ run("mkdir", self.path)
+ ret, val = run(cmd)
+ if ret:
+ panic("mount failed:", self.path)
+ def cleanup(self):
+ self.info(self.path, self.mdc_uuid,self.lov_uuid)
+ run("umount", self.path)
+ l = lookup(self.dom_node.parentNode, self.lov_uuid)
+ if l.nodeName == 'lov':
+ lov = LOV(l)
+ for osc_uuid in string.split(lov.devlist):
+ osc = lookup(self.dom_node.parentNode, osc_uuid)
+ if osc:
+ n = OSC(osc)
+ n.cleanup()
+ else:
+ panic('osc not found:', osc_uuid)
+ lov.cleanup()
+ else:
+ osc = OSC(l)
+ osc.cleanup()
+
# ============================================================
# XML processing and query
+# TODO: Change query funcs to use XPath, which is muc cleaner
def getDevice(obd):
- dev = obd.getElementsByTagName('device')[0]
- dev.normalize();
- try:
- size = int(dev.getAttribute('size'))
- except ValueError:
- size = 0
- return dev.firstChild.data, size
-
-
-def getNetworkInfo(node):
- name, uuid = getNodeAttr(node);
- type = node.getAttribute('type')
- nid = getText(node, 'server', "")
- port = int(getText(node, 'port', 0))
- return name, uuid, type, nid, port
-
-# extract device attributes for an obd
-def getNodeAttr(node):
- name = node.getAttribute('name')
- uuid = node.getAttribute('uuid')
- return name, uuid
-
-def getOBDInfo(obd):
- name, uuid = getNodeAttr(obd);
- obdtype = obd.getAttribute('type')
- devname, size = getDevice(obd)
- fstype = getText(obd, 'fstype')
- format = getText(obd, 'autoformat')
- return (name, uuid, obdtype, devname, size, fstype, format)
-
-# extract LOV
-def getLOVInfo(node):
- name, uuid = getNodeAttr(node)
- devs = node.getElementsByTagName('devices')[0]
- stripe_sz = int(devs.getAttribute('stripesize'))
- pattern = int(devs.getAttribute('pattern'))
- mdcref = node.getElementsByTagName('mdc_ref')[0]
- mdcuuid = mdcref.getAttribute('uuidref')
- mdc= lookup(node.parentNode, mdcuuid)
- mdsref = mdc.getElementsByTagName('mds_ref')[0]
- mdsuuid = mdsref.getAttribute('uuidref')
- mds= lookup(node.parentNode, mdsuuid)
- mdsname = getName(mds)
- devlist = ""
- stripe_cnt = 0
- for child in devs.childNodes:
- if child.nodeName == 'osc_ref':
- devlist = devlist + child.getAttribute('uuidref') + " "
- strip_cnt = stripe_cnt + 1
- return (name, uuid, mdcuuid, stripe_cnt, stripe_sz, pattern, devlist, mdsname)
-
-# extract device attributes for an obd
-def getMDSInfo(node):
- name, uuid = getNodeAttr(node)
- devname, size = getDevice(node)
- fstype = getText(node, 'fstype')
- format = getText(node, 'autoformat', "no")
- return (name, uuid, devname, size, fstype, format)
-
-# extract device attributes for an obd
-def getMDCInfo(node):
- name, uuid = getNodeAttr(node)
- ref = node.getElementsByTagName('mds_ref')[0]
- mdsuuid = ref.getAttribute('uuidref')
- ref = node.getElementsByTagName('network_ref')[0]
- netuuid = ref.getAttribute('uuidref')
- return (name, uuid, mdsuuid, netuuid)
-
-
-# extract device attributes for an obd
-def getOSTInfo(node):
- name, uuid = getNodeAttr(node)
- ref = node.getElementsByTagName('obd_ref')[0]
- uuid = ref.getAttribute('uuidref')
- obd = lookup(node.parentNode, uuid)
- if obd:
- obdname = getOBDInfo(obd)[0]
- else:
- obdname = "OBD NOT FOUND"
- return (name, uuid, obdname)
-
-# extract device attributes for an obd
-def getOSCInfo(node):
- name, uuid = getNodeAttr(node)
- ref = node.getElementsByTagName('obd_ref')[0]
- obduuid = ref.getAttribute('uuidref')
- ref = node.getElementsByTagName('network_ref')[0]
- ostuuid = ref.getAttribute('uuidref')
- return (name, uuid, obduuid, ostuuid)
-
-# extract device attributes for an obd
-def getMTPTInfo(node):
- name, uuid = getNodeAttr(node)
- path = getText(node, 'path')
- ref = node.getElementsByTagName('mdc_ref')[0]
- mdcuuid = ref.getAttribute('uuidref')
- ref = node.getElementsByTagName('osc_ref')[0]
- lovuuid = ref.getAttribute('uuidref')
- return (name, uuid, lovuuid, mdcuuid, path)
+ list = obd.getElementsByTagName('device')
+ if len(list) > 0:
+ dev = list[0]
+ dev.normalize();
+ try:
+ size = int(dev.getAttribute('size'))
+ except ValueError:
+ size = 0
+ return dev.firstChild.data, size
+ return '', 0
-
# Get the text content from the first matching child
+# If there is no content (or it is all whitespace), return
+# the default
def getText(node, tag, default=""):
list = node.getElementsByTagName(tag)
if len(list) > 0:
node = list[0]
node.normalize()
- return node.firstChild.data
+ if node.firstChild:
+ txt = string.strip(node.firstChild.data)
+ if txt:
+ return txt
+ return default
+
+def get_ost_net(node, uuid):
+ ost = lookup(node, uuid)
+ list = ost.getElementsByTagName('network_ref')
+ if list:
+ uuid = list[0].getAttribute('uuidref')
else:
- return default
-
-# Recusively search from node for a uuid
+ return None
+ return lookup(node, uuid)
+
def lookup(node, uuid):
for n in node.childNodes:
- # this service_id check is ugly. need some other way to
- # differentiate between definitions and references
if n.nodeType == n.ELEMENT_NODE:
if getUUID(n) == uuid:
return n
n = lookup(n, uuid)
if n: return n
return None
-
+
# Get name attribute of node
def getName(node):
return node.getAttribute('name')
# net,devices,ldlm:1, obd, mdd:2 mds,ost:3 osc,mdc:4 mounts:5
def getServiceLevel(node):
type = getServiceType(node)
- if type in ('network', 'device', 'ldlm'):
+ if type in ('network',):
return 1
- elif type in ('obd', 'mdd'):
+ if type in ('device', 'ldlm'):
return 2
- elif type in ('mds','ost'):
+ elif type in ('obd', 'mdd'):
return 3
- elif type in ('mdc','osc'):
+ elif type in ('mds','ost'):
return 4
- elif type in ('lov',):
+ elif type in ('mdc','osc'):
return 5
- elif type in ('mountpoint',):
+ elif type in ('lov',):
return 6
+ elif type in ('mountpoint',):
+ return 7
return 0
#
if n.nodeType == n.ELEMENT_NODE:
servNode = lookup(lustreNode, getRef(n))
if not servNode:
- panic('service not found: ' + getName(n))
+ print n
+ panic('service not found: ' + getRef(n))
level = getServiceLevel(servNode)
list.append((level, servNode))
list.sort()
# ============================================================
-# lconf type level logic
-#
-
-#
+# lconf level logic
# Start a service.
-def startService(node):
+def startService(node, clean_flag, module_flag):
type = getServiceType(node)
- debug('Starting service:', type, getName(node), getUUID(node))
+ debug('Service:', type, getName(node), getUUID(node))
# there must be a more dynamic way of doing this...
+ n = None
if type == 'ldlm':
- prepare_ldlm(node)
+ n = LDLM(node)
elif type == 'lov':
- prepare_lov(node)
+ n = LOV(node)
elif type == 'network':
- prepare_network(node)
+ n = Network(node)
elif type == 'obd':
- prepare_obd(node)
+ n = OBD(node)
elif type == 'ost':
- prepare_ost(node)
+ n = OST(node)
elif type == 'mds':
- prepare_mds(node)
+ n = MDS(node)
elif type == 'osc':
- prepare_osc(node)
+ n = OSC(node)
elif type == 'mdc':
- prepare_mdc(node)
+ n = MDC(node)
elif type == 'mountpoint':
- prepare_mountpoint(node)
+ n = Mountpoint(node)
+ else:
+ panic ("unknown service type:", type)
+
+ if module_flag:
+ if config.nomod():
+ return
+ if clean_flag:
+ n.cleanup_module()
+ else:
+ n.load_module()
+ else:
+ if config.nosetup():
+ return
+ if clean_flag:
+ n.cleanup()
+ else:
+ n.prepare()
#
# Prepare the system to run lustre using a particular profile
# * make sure partitions are in place and prepared
# * initialize devices with lctl
# Levels is important, and needs to be enforced.
-def startProfile(lustreNode, profileNode):
- if not profileNode:
- panic("profile:", profile, "not found.")
- services = getServices(lustreNode, profileNode)
- for s in services:
- startService(s[1])
-
-
-# Stop a service.
-def stopService(node):
- type = getServiceType(node)
- debug('Stopping service:', type, getName(node), getUUID(node))
- # there must be a more dynamic way of doing this...
- if type == 'ldlm':
- cleanup_ldlm(node)
- elif type == 'lov':
- cleanup_lov(node)
- elif type == 'network':
- cleanup_network(node)
- elif type == 'obd':
- cleanup_obd(node)
- elif type == 'ost':
- cleanup_ost(node)
- elif type == 'mds':
- cleanup_mds(node)
- elif type == 'osc':
- cleanup_osc(node)
- elif type == 'mdc':
- cleanup_mdc(node)
- elif type == 'mountpoint':
- cleanup_mountpoint(node)
-
-# Shutdown services in reverse order than they were started
-def cleanupProfile(lustreNode, profileNode):
+def startProfile(lustreNode, profileNode, clean_flag, module_flag):
if not profileNode:
panic("profile:", profile, "not found.")
services = getServices(lustreNode, profileNode)
- services.reverse()
+ if clean_flag:
+ services.reverse()
for s in services:
- stopService(s[1])
-
+ startService(s[1], clean_flag, module_flag)
#
# Load profile for
-def doHost(lustreNode, hosts, cleanFlag):
+def doHost(lustreNode, hosts, clean_flag):
+ node = None
for h in hosts:
node = getByName(lustreNode, 'node', h)
if node:
break
-
- reflist = node.getElementsByTagName('profile_ref')
- for r in reflist:
- if cleanFlag:
- cleanupProfile(lustreNode, lookup(lustreNode, getRef(r)))
- else:
- startProfile(lustreNode, lookup(lustreNode, getRef(r)))
-#
+ if not node:
+ print 'No host entry found.'
+ return
+
+ # Two step process: (1) load modules, (2) setup lustre
+ # if not cleaning, load modules first.
+ module_flag = not clean_flag
+ reflist = node.getElementsByTagName('profile')
+ for profile in reflist:
+ startProfile(lustreNode, profile, clean_flag, module_flag)
+
+ if not clean_flag:
+ setDebugPath()
+ script = config.gdb_script()
+ run(lctl.lctl, ' modules >', script)
+ if config.gdb():
+ # dump /tmp/ogdb and sleep/pause here
+ log ("The GDB module script is in", script)
+ time.sleep(5)
+
+ module_flag = not module_flag
+ for profile in reflist:
+ startProfile(lustreNode, profile, clean_flag, module_flag)
+
# Command line processing
#
def parse_cmdline(argv):
- short_opts = "hdv"
- long_opts = ["ldap", "reformat", "lustre=",
- "portals=", "makeldiff", "cleanup", "iam=",
- "help", "debug", "host="]
+ short_opts = "hdnv"
+ long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
+ "portals=", "makeldiff", "cleanup", "noexec",
+ "help", "node=", "get=", "nomod", "nosetup"]
opts = []
args = []
- global options
try:
opts, args = getopt.getopt(argv, short_opts, long_opts)
- except getopt.GetoptError:
+ except getopt.error:
print "invalid opt"
usage()
for o, a in opts:
if o in ("-h", "--help"):
usage()
- if o == "--cleanup":
- options['cleanup'] = 1
+ if o in ("-d","--cleanup"):
+ config.cleanup(1)
if o in ("-v", "--verbose"):
- options['verbose'] = 1
- if o in ("-d", "--debug"):
- options['debug'] = 1
- options['verbose'] = 1
+ config.verbose(1)
+ if o in ("-n", "--noexec"):
+ config.noexec(1)
+ config.verbose(1)
if o == "--portals":
- options['portals'] = a
+ config.portals = a
if o == "--lustre":
- options['lustre'] = a
+ config.lustre = a
if o == "--reformat":
- options['reformat'] = 1
- if o == "--host":
- options['hostname'] = [a]
+ config.reformat(1)
+ if o == "--node":
+ config.node(a)
+ if o == "--get":
+ config.url(a)
+ if o == "--gdb":
+ config.gdb(1)
+ if o == "--nomod":
+ config.nomod(1)
+ if o == "--nosetup":
+ config.nosetup(1)
return args
-#
+def fetch(url):
+ import urllib
+ data = ""
+ try:
+ s = urllib.urlopen(url)
+ data = s.read()
+ except:
+ usage()
+ return data
+
+def setupModulePath(cmd):
+ base = os.path.dirname(cmd)
+ if os.access(base+"/Makefile", os.R_OK):
+ config.src_dir(base + "/../../")
+
+def setDebugPath():
+ debug("debug path: ", config.debug_path())
+ if config.noexec():
+ return
+ try:
+ fp = open('/proc/sys/portals/debug_path', 'w')
+ fp.write(config.debug_path())
+ fp.close()
+ except IOError, e:
+ print e
+
+
+def makeDevices():
+ if not os.access('/dev/portals', os.R_OK):
+ run('mknod /dev/portals c 10 240')
+ if not os.access('/dev/obd', os.R_OK):
+ run('mknod /dev/obd c 10 241')
+
# Initialize or shutdown lustre according to a configuration file
# * prepare the system for lustre
# * configure devices with lctl
# Shutdown does steps in reverse
#
-lctl = LCTLInterface('lctl')
def main():
- global options
+ global TCP_ACCEPTOR, lctl
args = parse_cmdline(sys.argv[1:])
if len(args) > 0:
+ if not os.access(args[0], os.R_OK | os.W_OK):
+ print 'File not found:', args[0]
+ sys.exit(1)
dom = xml.dom.minidom.parse(args[0])
+ elif config.url():
+ xmldata = fetch(config.url())
+ dom = xml.dom.minidom.parseString(xmldata)
else:
usage()
- if not options.has_key('hostname'):
- ret, host = run('hostname')
- if ret:
- print "unable to determine hostname"
+ node_list = []
+ if config.node():
+ node_list.append(config.node())
+ else:
+ host = socket.gethostname()
+ if len(host) > 0:
+ node_list.append(host)
+ node_list.append('localhost')
+ debug("configuring for host: ", node_list)
+
+ TCP_ACCEPTOR = find_prog('acceptor')
+ if not TCP_ACCEPTOR:
+ if config.noexec():
+ TCP_ACCEPTOR = 'acceptor'
+ debug('! acceptor not found')
else:
- options['hostname'] = [host]
- options['hostname'].append('localhost')
- doHost(dom.childNodes[0], options['hostname'], options.has_key('cleanup') )
+ panic('acceptor not found')
-if __name__ == "__main__":
- main()
+ lctl = LCTLInterface('lctl')
+
+ setupModulePath(sys.argv[0])
+ makeDevices()
+ doHost(dom.documentElement, node_list, config.cleanup())
+if __name__ == "__main__":
+ try:
+ main()
+ except LconfError, e:
+ print e
+ except CommandError, e:
+ e.dump()
+