config.xml Lustre configuration in xml format.
--get <url> URL to fetch a config file
--v | --verbose Print system commands as they are run
--d | --debug Print system commands, but does not run them
--node <nodename> Load config for <nodename>
---cleanup Cleans up config. (Shutdown)
+-d | --cleanup Cleans up config. (Shutdown)
+-v | --verbose Print system commands as they are run
-h | --help Print this help
---gdb Create a gdb script to load the modules. Prints message
- after creating script and sleeps for 5 seconds.
+--gdb Prints message after creating gdb module script
+ and sleeps for 5 seconds.
+-n | --noexec Prints the commands and steps that will be run for a
+ config without executing them. This can used to check if a
+ config file is doing what it should be doing. (Implies -v)
+--nomod Skip load/unload module step.
+--nosetup Skip device setup/cleanup step.
"""
TODO = """
--ldap server LDAP server with lustre config database
+--makeldiff Translate xml source to LDIFF
--reformat Reformat all devices (will confirm)
+This are perhaps not needed:
--lustre="src dir" Base directory of lustre sources. Used to search
for modules.
--portals=src Portals source
---makeldiff Translate xml source to LDIFF
"""
sys.exit()
# debugging and error funcs
def fixme(msg = "this feature"):
- raise RuntimeError, msg + ' not implmemented yet.'
+ raise LconfError, msg + ' not implmemented yet.'
def panic(*args):
msg = string.join(map(str,args))
- print msg
if not config.noexec():
- raise RuntimeError, msg
+ raise LconfError(msg)
def log(*args):
msg = string.join(map(str,args))
# ============================================================
# locally defined exceptions
class CommandError (exceptions.Exception):
- def __init__(self, args=None):
+ def __init__(self, cmd_name, cmd_err, rc=None):
+ self.cmd_name = cmd_name
+ self.cmd_err = cmd_err
+ self.rc = rc
+
+ def dump(self):
+ import types
+ if type(self.cmd_err) == types.StringType:
+ if self.rc:
+ print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
+ else:
+ print "! %s: %s" % (self.cmd_name, self.cmd_err)
+ elif type(self.cmd_err) == types.ListType:
+ if self.rc:
+ print "! %s (error %d):" % (self.cmd_name, self.rc)
+ else:
+ print "! %s:" % (self.cmd_name)
+ for s in self.cmd_err:
+ print "> %s" %(string.strip(s))
+ else:
+ print self.cmd_err
+
+class LconfError (exceptions.Exception):
+ def __init__(self, args):
self.args = args
+
# ============================================================
# handle lctl interface
class LCTLInterface:
debug('! lctl not found')
self.lctl = 'lctl'
else:
- raise CommandError, "unable to find lctl binary."
+ raise CommandError('lctl', "unable to find lctl binary.")
def run(self, cmds):
"""
p.tochild.write(cmds + "\n")
p.tochild.close()
out = p.fromchild.readlines()
- ret = p.poll()
- for l in out:
- debug('lctl:',string.strip(l))
err = p.childerr.readlines()
+ ret = p.wait()
if ret or len(err):
- log (self.lctl, "error:", ret)
- logall(err)
- raise CommandError, err
+ raise CommandError(self.lctl, err, ret)
return ret, out
+
def network(self, net, nid):
""" initialized network and add "self" """
# Idea: "mynid" could be used for all network types to add "self," and then
def find_module(src_dir, modname):
mod = '%s.o' % (modname)
- search = (src_dir + "/lustre", src_dir + "/portals")
+ search = (src_dir + "/lustre", src_dir + "/portals/linux")
for d in search:
try:
module = do_find_file(d, mod)
log('unable to clean loop device:', dev, 'for file:', file)
logall(out)
+# determine if dev is formatted as a <fstype> filesystem
+def need_format(fstype, dev):
+ # FIXME don't know how to implement this
+ return 0
+
# initialize a block device if needed
def block_dev(dev, size, fstype, format):
if config.noexec(): return dev
if not is_block(dev):
dev = init_loop(dev, size, fstype)
- if (format == 'yes'):
+ if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
mkfs(fstype, dev)
+
+# else:
+# panic("device:", dev,
+# "not prepared, and autoformat is not set.\n",
+# "Rerun with --reformat option to format ALL filesystems")
+
return dev
+def get_local_address(net_type):
+ """Return the local address for the network type."""
+ local = ""
+ if net_type == 'tcp':
+ # host `hostname`
+ host = socket.gethostname()
+ local = socket.gethostbyname(host)
+ elif net_type == 'elan':
+ # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position'
+ try:
+ fp = open('/proc/elan/device0/position', 'r')
+ lines = fp.readlines()
+ fp.close()
+ for l in lines:
+ a = string.split(l)
+ if a[0] == 'NodeId':
+ local = a[1]
+ break
+ except IOError, e:
+ log(e)
+ elif net_type == 'gm':
+ fixme("automatic local address for GM")
+ return local
+
+
+
# ============================================================
# Classes to prepare and cleanup the various objects
#
self.info()
try:
lctl.cleanup(self.name, self.uuid)
- except CommandError:
+ except CommandError, e:
print "cleanup failed: ", self.name
def add_module(self, modname):
# (rc, out) = run ('/sbin/lsmod | grep -s', mod)
if self.mod_loaded(mod) and not config.noexec():
continue
+ log ('loading module:', mod)
if config.src_dir():
module = find_module(config.src_dir(), mod)
if not module:
panic('module not found:', mod)
(rc, out) = run('/sbin/insmod', module)
if rc:
- raise CommandError("insmod failed:", module)
+ raise CommandError('insmod', out, rc)
else:
(rc, out) = run('/sbin/modprobe', mod)
if rc:
- raise CommandError("modprobe failed:", module)
+ raise CommandError('modprobe', out, rc)
def cleanup_module(self):
"""Unload the modules in the list in reverse order."""
rev = self.kmodule_list
rev.reverse()
for mod in rev:
- debug('rmmod', mod)
+ if not self.mod_loaded(mod):
+ continue
+ log('unloading module:', mod)
if config.noexec():
continue
- run('/sbin/rmmod', mod)
+ (rc, out) = run('/sbin/rmmod', mod)
+ if rc:
+ log('! unable to unload module:', mod)
+ logall(out)
class Network(Module):
def __init__(self,node):
Module.__init__(self, 'NETWORK', node)
self.net_type = node.getAttribute('type')
- self.nid = getText(node, 'server', "")
+ self.nid = getText(node, 'server', '*')
self.port = int(getText(node, 'port', 0))
self.send_buf = int(getText(node, 'send_buf', 0))
self.read_buf = int(getText(node, 'read_buf', 0))
+ if self.nid == '*':
+ self.nid = get_local_address(self.net_type)
+ if not self.nid:
+ panic("unable to set nid for", self.net_type)
self.add_module('portals')
if self.net_type == 'tcp':
if self.net_type == 'tcp':
ret = run_daemon(TCP_ACCEPTOR, self.port)
if ret:
- print "error:", ret
- raise CommandError, "cannot run acceptor"
+ raise CommandError(TCP_ACCEPTOR, 'failed', ret)
lctl.network(self.net_type, self.nid)
lctl.newdev(attach = "ptlrpc RPCDEV")
self.info(self.net_type, self.nid, self.port)
try:
lctl.cleanup("RPCDEV", "")
+ except CommandError, e:
+ print "cleanup failed: ", self.name
+ try:
lctl.disconnectAll(self.net_type)
- except CommandError:
+ except CommandError, e:
print "cleanup failed: ", self.name
if self.net_type == 'tcp':
# yikes, this ugly! need to save pid in /var/something
def __init__(self,node):
Module.__init__(self, 'LDLM', node)
self.add_module('ldlm')
- self.add_module('extN') # yuck, fix dupe handling and move this
def prepare(self):
self.info()
lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
self.devname, self.size = getDevice(node)
self.fstype = getText(node, 'fstype')
self.format = getText(node, 'autoformat', "no")
+ if self.fstype == 'extN':
+ self.add_module('extN')
self.add_module('mds')
self.add_module('mds_%s' % (self.fstype))
srv = Network(net)
try:
lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+ except CommandError:
+ print "disconnect failed: ", self.name
+ try:
lctl.cleanup(self.name, self.uuid)
except CommandError:
print "cleanup failed: ", self.name
self.devname, self.size = getDevice(node)
self.fstype = getText(node, 'fstype')
self.format = getText(node, 'autoformat', 'yes')
+ if self.fstype == 'extN':
+ self.add_module('extN')
self.add_module(self.obdtype)
# need to check /proc/mounts and /etc/mtab before
# FIXME: check if device is already formatted.
def prepare(self):
self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
- blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
+ if self.obdtype == 'obdecho':
+ blkdev = ''
+ else:
+ blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
setup ="%s %s" %(blkdev, self.fstype))
def cleanup(self):
Module.cleanup(self)
- clean_loop(self.devname)
+ if not self.obdtype == 'obdecho':
+ clean_loop(self.devname)
class OST(Module):
def __init__(self,node):
srv = Network(net_uuid)
try:
lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+ except CommandError:
+ print " failed: ", self.name
+ try:
lctl.cleanup(self.name, self.uuid)
except CommandError:
print "cleanup failed: ", self.name
# TODO: Change query funcs to use XPath, which is muc cleaner
def getDevice(obd):
- dev = obd.getElementsByTagName('device')[0]
- dev.normalize();
- try:
- size = int(dev.getAttribute('size'))
- except ValueError:
- size = 0
- return dev.firstChild.data, size
+ list = obd.getElementsByTagName('device')
+ if len(list) > 0:
+ dev = list[0]
+ dev.normalize();
+ try:
+ size = int(dev.getAttribute('size'))
+ except ValueError:
+ size = 0
+ return dev.firstChild.data, size
+ return '', 0
# Get the text content from the first matching child
+# If there is no content (or it is all whitespace), return
+# the default
def getText(node, tag, default=""):
list = node.getElementsByTagName(tag)
if len(list) > 0:
node = list[0]
node.normalize()
- return node.firstChild.data
- else:
- return default
+ if node.firstChild:
+ txt = string.strip(node.firstChild.data)
+ if txt:
+ return txt
+ return default
def get_ost_net(node, uuid):
ost = lookup(node, uuid)
# Command line processing
#
def parse_cmdline(argv):
- short_opts = "hdv"
+ short_opts = "hdnv"
long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
- "portals=", "makeldiff", "cleanup",
- "help", "debug", "node=", "get=", "nomod", "nosetup"]
+ "portals=", "makeldiff", "cleanup", "noexec",
+ "help", "node=", "get=", "nomod", "nosetup"]
opts = []
args = []
try:
opts, args = getopt.getopt(argv, short_opts, long_opts)
- except getopt.GetoptError:
+ except getopt.error:
print "invalid opt"
usage()
for o, a in opts:
if o in ("-h", "--help"):
usage()
- if o == "--cleanup":
+ if o in ("-d","--cleanup"):
config.cleanup(1)
if o in ("-v", "--verbose"):
config.verbose(1)
- if o in ("-d", "--debug"):
+ if o in ("-n", "--noexec"):
config.noexec(1)
config.verbose(1)
if o == "--portals":
debug("debug path: ", config.debug_path())
if config.noexec():
return
- fp = open('/proc/sys/portals/debug_path', 'w')
- fp.write(config.debug_path())
- fp.close()
+ try:
+ fp = open('/proc/sys/portals/debug_path', 'w')
+ fp.write(config.debug_path())
+ fp.close()
+ except IOError, e:
+ print e
def makeDevices():
if len(host) > 0:
node_list.append(host)
node_list.append('localhost')
- print "configuring for host: ", node_list
+ debug("configuring for host: ", node_list)
TCP_ACCEPTOR = find_prog('acceptor')
if not TCP_ACCEPTOR:
if __name__ == "__main__":
try:
main()
- except RuntimeError:
- pass
- except CommandError:
- print 'FIXME: insert exception data here'
+ except LconfError, e:
+ print e
+ except CommandError, e:
+ e.dump()
+