# Global parameters
TCP_ACCEPTOR = ''
MAXTCPBUF = 1048576
+DEFAULT_TCPBUF = 1048576
#
# Maximum number of devices to search for.
# (the /dev/loop* nodes need to be created beforehand)
config.xml Lustre configuration in xml format.
--get <url> URL to fetch a config file
--node <nodename> Load config for <nodename>
--d | --cleanup Cleans up config. (Shutdown)
+-d | --cleanup Cleans up config. (Shutdown)
-v | --verbose Print system commands as they are run
-h | --help Print this help
--gdb Prints message after creating gdb module script
config file is doing what it should be doing. (Implies -v)
--nomod Skip load/unload module step.
--nosetup Skip device setup/cleanup step.
+--reformat Reformat all devices (without question)
"""
TODO = """
---ldap server LDAP server with lustre config database
+--ldap server LDAP server with lustre config database
--makeldiff Translate xml source to LDIFF
---reformat Reformat all devices (will confirm)
This are perhaps not needed:
--lustre="src dir" Base directory of lustre sources. Used to search
for modules.
self._url = None
self._gdb_script = '/tmp/ogdb'
self._debug_path = '/tmp/lustre-log'
+ self._dump_file = None
self._src_dir = None
def verbose(self, flag = None):
return self._debug_path
def src_dir(self, val = None):
- if val: self._url = val
- return self._url
+ if val: self._src_dir = val
+ return self._src_dir
+
+ def dump_file(self, val = None):
+ if val: self._dump_file = val
+ return self._dump_file
config = Config()
quit """ % (net, uuid, tgt, gw, tgt)
self.run(cmds)
+ # add a route to a range
+ def del_route_host(self, net, uuid, gw, tgt):
+ cmds = """
+ network %s
+ del_uuid %s
+ del_route %s
+ quit """ % (net, uuid, tgt)
+ self.run(cmds)
+
# disconnect one connection
def disconnect(self, net, nid, port, servuuid):
cmds = """
quit""" % (net, nid, servuuid)
self.run(cmds)
- # disconnect all connections
+ # disconnect all
def disconnectAll(self, net):
cmds = """
network %s
- disconnect
del_uuid self
+ disconnect
quit""" % (net)
self.run(cmds)
quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist)
self.run(cmds)
+ # cleanup a device
+ def dump(self, dump_file):
+ cmds = """
+ debug_kernel %s 1
+ quit""" % (dump_file)
+ self.run(cmds)
+
# ============================================================
# Various system-level functions
# (ideally moved to their own module)
if dev:
print 'WARNING file:', file, 'already mapped to', dev
return dev
- if not os.access(file, os.R_OK | os.W_OK):
+ if config.reformat() or not os.access(file, os.R_OK | os.W_OK):
run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file))
loop = loop_base()
# find next free loop
return dev
-def get_local_address(net_type):
+def if2addr(iface):
+ """lookup IP address for an interface"""
+ rc, out = run("/sbin/ifconfig", iface)
+ if rc or not out:
+ return None
+ addr = string.split(out[1])[1]
+ ip = string.split(addr, ':')[1]
+ return ip
+
+def get_local_address(net_type, wildcard):
"""Return the local address for the network type."""
local = ""
if net_type == 'tcp':
- # host `hostname`
- host = socket.gethostname()
- local = socket.gethostbyname(host)
+ if ':' in wildcard:
+ iface, star = string.split(wildcard, ':')
+ local = if2addr(iface)
+ if not local:
+ panic ("unable to determine ip for:", wildcard)
+ else:
+ host = socket.gethostname()
+ local = socket.gethostbyname(host)
elif net_type == 'elan':
# awk '/NodeId/ { print $2 }' '/proc/elan/device0/position'
try:
def lookup_server(self, srv_uuid):
""" Lookup a server's network information """
net = get_ost_net(self.dom_node.parentNode, srv_uuid)
+ if not net:
+ panic ("Unable to find a server for:", srv_uuid)
self._server = Network(net)
def get_server(self):
"""Unload the modules in the list in reverse order."""
rev = self.kmodule_list
rev.reverse()
- for mod in rev:
+ for dev_dir, mod in rev:
if not self.mod_loaded(mod):
continue
+ # debug hack
+ if mod == 'portals' and config.dump_file():
+ lctl.dump(config.dump_file())
log('unloading module:', mod)
if config.noexec():
continue
self.net_type = get_attr(dom_node,'type')
self.nid = get_text(dom_node, 'server', '*')
self.port = get_text_int(dom_node, 'port', 0)
- self.send_mem = get_text_int(dom_node, 'send_mem', 65536)
- self.recv_mem = get_text_int(dom_node, 'recv_mem', 65536)
- if self.nid == '*':
- self.nid = get_local_address(self.net_type)
+ self.send_mem = get_text_int(dom_node, 'send_mem', DEFAULT_TCPBUF)
+ self.recv_mem = get_text_int(dom_node, 'recv_mem', DEFAULT_TCPBUF)
+ if '*' in self.nid:
+ self.nid = get_local_address(self.net_type, self.nid)
if not self.nid:
- panic("unable to set nid for", self.net_type)
+ panic("unable to set nid for", self.net_type, self.nid)
+ debug("nid:", self.nid)
self.add_module('portals/linux/oslib/', 'portals')
- if node_needs_router():
- self.add_module('portals/linux/router', 'kptlrouter')
+ if node_needs_router():
+ self.add_module('portals/linux/router', 'kptlrouter')
if self.net_type == 'tcp':
self.add_module('portals/linux/socknal', 'ksocknal')
if self.net_type == 'elan':
def prepare(self):
self.info(self.net_type, self.nid, self.port)
if self.net_type == 'tcp':
- ret = run_daemon(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, self.port)
+ ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, self.port)
if ret:
- raise CommandError(TCP_ACCEPTOR, 'failed', ret)
+ raise CommandError(TCP_ACCEPTOR, out, ret)
ret = self.dom_node.getElementsByTagName('route_tbl')
for a in ret:
for r in a.getElementsByTagName('route'):
if not srv:
panic("no server for nid", lo)
else:
- try:
+ try:
lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
- except CommandError, e:
- print "disconnect failed: ", self.name
- e.dump()
- try:
+ except CommandError, e:
+ print "disconnect failed: ", self.name
+ e.dump()
+ try:
lctl.del_route(self.net_type, self.nid, lo, hi)
- except CommandError, e:
+ except CommandError, e:
print "del_route failed: ", self.name
e.dump()
class LOV(Module):
def __init__(self,dom_node):
Module.__init__(self, 'LOV', dom_node)
- self.stripe_sz = get_attr_int(dom_node, 'stripesize', 65536)
- self.stripe_off = get_attr_int(dom_node, 'stripeoffset', 0)
- self.pattern = get_attr_int(dom_node, 'pattern', 0)
- self.mdsuuid = get_first_ref(dom_node, 'mds')
- mds= lookup(dom_node.parentNode, self.mdsuuid)
- self.mdsname = getName(mds)
- self.devlist = get_all_refs(dom_node, 'osc')
- self.stripe_cnt = len(self.devlist)
+ self.mds_uuid = get_first_ref(dom_node, 'mds')
+ mds= lookup(dom_node.parentNode, self.mds_uuid)
+ self.mds_name = getName(mds)
+ devs = dom_node.getElementsByTagName('devices')
+ if len(devs) > 0:
+ dev_node = devs[0]
+ self.stripe_sz = get_attr_int(dev_node, 'stripesize', 65536)
+ self.stripe_off = get_attr_int(dev_node, 'stripeoffset', 0)
+ self.pattern = get_attr_int(dev_node, 'pattern', 0)
+ self.devlist = get_all_refs(dev_node, 'osc')
+ self.stripe_cnt = get_attr_int(dev_node, 'stripecount', len(self.devlist))
+ self.add_module('lustre/mdc', 'mdc')
+ self.add_module('lustre/lov', 'lov')
def prepare(self):
- self.info(self.mdsuuid, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern,
- self.devlist, self.mdsname)
- lctl.lovconfig(self.uuid, self.mdsname, self.stripe_cnt,
- self.stripe_sz, self.stripe_off, self.pattern,
- string.join(self.devlist))
+ for osc_uuid in self.devlist:
+ osc = lookup(self.dom_node.parentNode, osc_uuid)
+ if osc:
+ n = OSC(osc)
+ n.prepare()
+ else:
+ panic('osc not found:', osc_uuid)
+ mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
+ self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
+ self.stripe_off, self.pattern, self.devlist, self.mds_name)
+ lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
+ setup ="%s" % (mdc_uuid))
+
+ def cleanup(self):
+ for osc_uuid in self.devlist:
+ osc = lookup(self.dom_node.parentNode, osc_uuid)
+ if osc:
+ n = OSC(osc)
+ n.cleanup()
+ else:
+ panic('osc not found:', osc_uuid)
+ Module.cleanup(self)
+ cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
+ def load_module(self):
+ for osc_uuid in self.devlist:
+ osc = lookup(self.dom_node.parentNode, osc_uuid)
+ if osc:
+ n = OSC(osc)
+ n.load_module()
+ break
+ else:
+ panic('osc not found:', osc_uuid)
+ Module.load_module(self)
+ def cleanup_module(self):
+ Module.cleanup_module(self)
+ for osc_uuid in self.devlist:
+ osc = lookup(self.dom_node.parentNode, osc_uuid)
+ if osc:
+ n = OSC(osc)
+ n.cleanup_module()
+ break
+ else:
+ panic('osc not found:', osc_uuid)
+
+class LOVConfig(Module):
+ def __init__(self,dom_node):
+ Module.__init__(self, 'LOVConfig', dom_node)
+ self.lov_uuid = get_first_ref(dom_node, 'lov')
+ l = lookup(dom_node.parentNode, self.lov_uuid)
+ self.lov = LOV(l)
+
+ def prepare(self):
+ lov = self.lov
+ self.info(lov.mds_uuid, lov.stripe_cnt, lov.stripe_sz, lov.stripe_off, lov.pattern,
+ lov.devlist, lov.mds_name)
+ lctl.lovconfig(lov.uuid, lov.mds_name, lov.stripe_cnt,
+ lov.stripe_sz, lov.stripe_off, lov.pattern,
+ string.join(lov.devlist))
+
+ def cleanup(self):
+ #nothing to do here
+ pass
class MDS(Module):
Module.cleanup(self)
clean_loop(self.devname)
+# Very unusual case, as there is no MDC element in the XML anymore
+# Builds itself from an MDS node
class MDC(Module):
def __init__(self,dom_node):
- Module.__init__(self, 'MDC', dom_node)
- self.mds_uuid = get_first_ref(dom_node, 'mds')
- self.lookup_server(self.mds_uuid)
+ self.mds = MDS(dom_node)
+ self.dom_node = dom_node
+ self.module_name = 'MDC'
+ self.kmodule_list = []
+ self._server = None
+ self._connected = 0
+
+ host = socket.gethostname()
+ self.name = 'MDC_'+host
+ self.uuid = self.name+'_UUID'
+
+ self.lookup_server(self.mds.uuid)
self.add_module('lustre/mdc', 'mdc')
def prepare(self):
- self.info(self.mds_uuid)
+ self.info(self.mds.uuid)
srv = self.get_server()
lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
- setup ="%s %s" %(self.mds_uuid, srv.uuid))
+ setup ="%s %s" %(self.mds.uuid, srv.uuid))
class OBD(Module):
def __init__(self, dom_node):
lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
setup ="%s" % (self.obd_uuid))
+
+# virtual interface for OSC and LOV
+class VOSC(Module):
+ def __init__(self,dom_node):
+ Module.__init__(self, 'VOSC', dom_node)
+ if dom_node.nodeName == 'lov':
+ self.osc = LOV(dom_node)
+ else:
+ self.osc = OSC(dom_node)
+ def prepare(self):
+ self.osc.prepare()
+ def cleanup(self):
+ self.osc.cleanup()
+ def load_module(self):
+ self.osc.load_module()
+ def cleanup_module(self):
+ self.osc.cleanup_module()
+
+
class OSC(Module):
def __init__(self,dom_node):
Module.__init__(self, 'OSC', dom_node)
lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
setup ="%s %s" %(self.obd_uuid, srv.uuid))
+ def cleanup(self):
+ srv = self.get_server()
+ if local_net(srv):
+ Module.cleanup(self)
+ else:
+ self.info(self.obd_uuid, self.ost_uuid)
+ r = find_route(srv)
+ if r:
+ lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
+ Module.cleanup(self)
+
class Mountpoint(Module):
def __init__(self,dom_node):
Module.__init__(self, 'MTPT', dom_node)
self.path = get_text(dom_node, 'path')
- self.mdc_uuid = get_first_ref(dom_node, 'mdc')
+ self.mds_uuid = get_first_ref(dom_node, 'mds')
self.lov_uuid = get_first_ref(dom_node, 'osc')
- self.add_module('lustre/osc', 'osc')
- # should add lov only if needed
- self.add_module('lustre/lov', 'lov')
+ self.add_module('lustre/mdc', 'mdc')
self.add_module('lustre/llite', 'llite')
+ l = lookup(self.dom_node.parentNode, self.lov_uuid)
+ self.osc = VOSC(l)
def prepare(self):
- l = lookup(self.dom_node.parentNode, self.lov_uuid)
- if l.nodeName == 'lov':
- lov = LOV(l)
- for osc_uuid in lov.devlist:
- osc = lookup(self.dom_node.parentNode, osc_uuid)
- if osc:
- n = OSC(osc)
- n.prepare()
- else:
- panic('osc not found:', osc_uuid)
- lctl.newdev(attach="lov %s %s" % (lov.name, lov.uuid),
- setup ="%s" % (self.mdc_uuid))
- else:
- osc = OSC(l)
- osc.prepare()
-
- self.info(self.path, self.mdc_uuid,self.lov_uuid)
+ self.osc.prepare()
+ mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
+
+ self.info(self.path, self.mds_uuid,self.lov_uuid)
cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
- (self.lov_uuid, self.mdc_uuid, self.path)
+ (self.lov_uuid, mdc_uuid, self.path)
run("mkdir", self.path)
ret, val = run(cmd)
if ret:
panic("mount failed:", self.path)
+
def cleanup(self):
- self.info(self.path, self.mdc_uuid,self.lov_uuid)
+ self.info(self.path, self.mds_uuid,self.lov_uuid)
(rc, out) = run("umount", self.path)
if rc:
log("umount failed, cleanup will most likely not work.")
l = lookup(self.dom_node.parentNode, self.lov_uuid)
- if l.nodeName == 'lov':
- lov = LOV(l)
- for osc_uuid in lov.devlist:
- osc = lookup(self.dom_node.parentNode, osc_uuid)
- if osc:
- n = OSC(osc)
- n.cleanup()
- else:
- panic('osc not found:', osc_uuid)
- else:
- osc = OSC(l)
- osc.cleanup()
-
+ self.osc.cleanup()
+ cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
+
+ def load_module(self):
+ self.osc.load_module()
+ Module.load_module(self)
+ def cleanup_module(self):
+ Module.cleanup_module(self)
+ self.osc.cleanup_module()
+
# ============================================================
# XML processing and query
return 40
elif type in ('mdc','osc'):
return 50
- elif type in ('lov',):
+ elif type in ('lov', 'lovconfig'):
return 60
elif type in ('mountpoint',):
return 70
return None
-
+############################################################
+# MDC UUID hack -
+# FIXME: clean this mess up!
+#
+mdc_uuid = None
+def prepare_mdc(dom_node, mds_uuid):
+ global mdc_uuid
+ mds_node = lookup(dom_node, mds_uuid);
+ if not mds_node:
+ panic("no mds:", mds_uuid)
+ if mdc_uuid:
+ return mdc_uuid
+ mdc = MDC(mds_node)
+ mdc.prepare()
+ mdc_uuid = mdc.uuid
+ return mdc_uuid
+
+mdc_cleaned = None
+def cleanup_mdc(dom_node, mds_uuid):
+ global mdc_cleaned
+ mds_node = lookup(dom_node, mds_uuid);
+ if not mds_node:
+ panic("no mds:", mds_uuid)
+ if not mdc_cleaned:
+ mdc = MDC(mds_node)
+ mdc.cleanup()
+ mdc_uuid = None
+ mdc_cleaned = 'yes'
+
############################################################
# routing ("rooting")
list = lustre.getElementsByTagName('node')
for node in list:
if get_attr(node, 'router'):
- router_flag = 1
+ router_flag = 1
for (local_type, local_nid) in local_node:
gw = None
netlist = node.getElementsByTagName('network')
n = LDLM(dom_node)
elif type == 'lov':
n = LOV(dom_node)
+ elif type == 'lovconfig':
+ n = LOVConfig(dom_node)
elif type == 'network':
n = Network(dom_node)
elif type == 'obd':
elif type == 'mds':
n = MDS(dom_node)
elif type == 'osc':
- n = OSC(dom_node)
+ n = VOSC(dom_node)
elif type == 'mdc':
n = MDC(dom_node)
elif type == 'mountpoint':
init_node(dom_node)
init_route_config(lustreNode)
else:
- global router_flag
- router_flag = 1
+ global router_flag
+ router_flag = 1
# Two step process: (1) load modules, (2) setup lustre
# if not cleaning, load modules first.
short_opts = "hdnv"
long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
"portals=", "makeldiff", "cleanup", "noexec",
- "help", "node=", "get=", "nomod", "nosetup"]
+ "help", "node=", "get=", "nomod", "nosetup",
+ "dump="]
opts = []
args = []
try:
config.nomod(1)
if o == "--nosetup":
config.nosetup(1)
+ if o == "--dump":
+ config.dump_file(a)
return args
def fetch(url):
debug("configuring for host: ", node_list)
if len(host) > 0:
- config._debug_path = '/tmp/lustre-log-' + host
+ config._debug_path = config._debug_path + '-' + host
+ config._gdb_script = config._gdb_script + '-' + host
TCP_ACCEPTOR = find_prog('acceptor')
if not TCP_ACCEPTOR: