From 34a9e306342a4183c20578a42abfa8134f3ad975 Mon Sep 17 00:00:00 2001 From: rread Date: Thu, 29 Aug 2002 12:01:53 +0000 Subject: [PATCH] * first cut at routing config support, pretty tacky but might just work * print more diagnostics for cleanup errors * various cleanups --- lustre/utils/lconf | 536 ++++++++++++++++++++++++++++++++++------------------- lustre/utils/lmc | 337 +++++++++++++++++++++------------ 2 files changed, 556 insertions(+), 317 deletions(-) diff --git a/lustre/utils/lconf b/lustre/utils/lconf index a652d89..6b758de 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -254,21 +254,41 @@ class LCTLInterface: self.run(cmds) - # create a new connection - def connect(self, net, nid, port, servuuid, send_buf, read_buf): - # XXX: buf size params not used yet - cmds = """ + # create a new connection + def connect(self, net, nid, port, servuuid, send_mem, recv_mem): + if net == 'tcp': + cmds = """ network %s + add_uuid %s %s + send_mem %d + recv_mem %d connect %s %d + quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, ) + else: + cmds = """ + network %s add_uuid %s %s - quit""" % (net, nid, port, servuuid, nid) + connect %s %d + quit""" % (net, servuuid, nid, nid, port, ) + self.run(cmds) - # create a new connection - def add_route(self, net, to, via): + # add a route to a range + def add_route(self, net, gw, lo, hi): + cmds = """ + network %s + add_route %s %s %s + """ % (net, gw, lo, hi) + self.run(cmds) + + # add a route to a host + def add_route_host(self, net, uuid, gw, tgt): cmds = """ - """ - #self.run(cmds) + network %s + add_uuid %s %s + add_route %s %s + """ % (net, uuid, tgt, gw, tgt) + self.run(cmds) # disconnect one connection def disconnect(self, net, nid, port, servuuid): @@ -525,24 +545,43 @@ class Module: """ Base class for the rest of the modules. The default cleanup method is defined here, as well as some utilitiy funcs. """ - def __init__(self, tag_name, node): - self.dom_node = node - self.tag_name = tag_name - self.name = node.getAttribute('name') - self.uuid = node.getAttribute('uuid') + def __init__(self, module_name, dom_node): + self.dom_node = dom_node + self.module_name = module_name + self.name = get_attr(dom_node, 'name') + self.uuid = get_attr(dom_node, 'uuid') self.kmodule_list = [] + self._server = None + self._connected = 0 def info(self, *args): msg = string.join(map(str,args)) - print self.tag_name + ":", self.name, self.uuid, msg + print self.module_name + ":", self.name, self.uuid, msg + + + def lookup_server(self, srv_uuid): + """ Lookup a server's network information """ + net = get_ost_net(self.dom_node.parentNode, srv_uuid) + self._server = Network(net) + + def get_server(self): + return self._server def cleanup(self): """ default cleanup, used for most modules """ self.info() + srv = self.get_server() + if srv: + try: + lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid) + except CommandError, e: + log(self.module_name, "disconnect failed: ", self.name) + e.dump() try: lctl.cleanup(self.name, self.uuid) except CommandError, e: - print "cleanup failed: ", self.name + log(self.module_name, "cleanup failed: ", self.name) + e.dump() def add_module(self, modname): """Append a module to list of modules to load.""" @@ -594,13 +633,13 @@ class Module: class Network(Module): - def __init__(self,node): - Module.__init__(self, 'NETWORK', node) - self.net_type = node.getAttribute('type') - self.nid = getText(node, 'server', '*') - self.port = int(getText(node, 'port', 0)) - self.send_buf = int(getText(node, 'send_buf', 0)) - self.read_buf = int(getText(node, 'read_buf', 0)) + def __init__(self,dom_node): + Module.__init__(self, 'NETWORK', dom_node) + self.net_type = get_attr(dom_node,'type') + self.nid = get_text(dom_node, 'server', '*') + self.port = get_text_int(dom_node, 'port', 0) + self.send_buf = get_text_int(dom_node, 'send_buf', 65536) + self.read_buf = get_text_int(dom_node, 'read_buf', 65536) if self.nid == '*': self.nid = get_local_address(self.net_type) if not self.nid: @@ -622,6 +661,13 @@ class Network(Module): ret = run_daemon(TCP_ACCEPTOR, self.port) if ret: raise CommandError(TCP_ACCEPTOR, 'failed', ret) + + ret = self.dom_node.getElementsByTagName('route_tbl') + for a in ret: + for r in a.getElementsByTagName('route'): + lctl.add_route(self.net_type, self.nid, get_attr(r, 'lo'), + get_attr(r,'hi', '')) + lctl.network(self.net_type, self.nid) lctl.newdev(attach = "ptlrpc RPCDEV") @@ -631,17 +677,19 @@ class Network(Module): lctl.cleanup("RPCDEV", "") except CommandError, e: print "cleanup failed: ", self.name + e.dump() try: lctl.disconnectAll(self.net_type) except CommandError, e: - print "cleanup failed: ", self.name + print "disconnectAll failed: ", self.name + e.dump() if self.net_type == 'tcp': # yikes, this ugly! need to save pid in /var/something run("killall acceptor") class LDLM(Module): - def __init__(self,node): - Module.__init__(self, 'LDLM', node) + def __init__(self,dom_node): + Module.__init__(self, 'LDLM', dom_node) self.add_module('ldlm') def prepare(self): self.info() @@ -649,24 +697,16 @@ class LDLM(Module): setup ="") class LOV(Module): - def __init__(self,node): - Module.__init__(self, 'LOV', node) - devs = node.getElementsByTagName('devices')[0] - self.stripe_sz = int(devs.getAttribute('stripesize')) - self.stripe_off = int(devs.getAttribute('stripeoffset')) - self.pattern = int(devs.getAttribute('pattern')) - mdsref = node.getElementsByTagName('mds_ref')[0] - self.mdsuuid = mdsref.getAttribute('uuidref') - mds= lookup(node.parentNode, self.mdsuuid) + def __init__(self,dom_node): + Module.__init__(self, 'LOV', dom_node) + self.stripe_sz = get_attr_int(dom_node, 'stripesize', 65536) + self.stripe_off = get_attr_int(dom_node, 'stripeoffset', 0) + self.pattern = get_attr_int(dom_node, 'pattern', 0) + self.mdsuuid = get_first_ref(dom_node, 'mds') + mds= lookup(dom_node.parentNode, self.mdsuuid) self.mdsname = getName(mds) - devlist = "" - stripe_cnt = 0 - for child in devs.childNodes: - if child.nodeName == 'osc_ref': - devlist = devlist + child.getAttribute('uuidref') + " " - stripe_cnt = stripe_cnt + 1 - self.devlist = devlist - self.stripe_cnt = stripe_cnt + self.devlist = get_all_refs(dom_node, 'osc') + self.stripe_cnt = len(self.devlist) self.add_module('osc') self.add_module('lov') @@ -675,17 +715,15 @@ class LOV(Module): self.devlist, self.mdsname) lctl.lovconfig(self.uuid, self.mdsname, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern, - self.devlist) + string.join(self.devlist)) - def cleanup(self): - pass class MDS(Module): - def __init__(self,node): - Module.__init__(self, 'MDS', node) - self.devname, self.size = getDevice(node) - self.fstype = getText(node, 'fstype') - self.format = getText(node, 'autoformat', "no") + def __init__(self,dom_node): + Module.__init__(self, 'MDS', dom_node) + self.devname, self.size = get_device(dom_node) + self.fstype = get_text(dom_node, 'fstype') + self.format = get_text(dom_node, 'autoformat', "no") if self.fstype == 'extN': self.add_module('extN') self.add_module('mds') @@ -701,43 +739,26 @@ class MDS(Module): clean_loop(self.devname) class MDC(Module): - def __init__(self,node): - Module.__init__(self, 'MDC', node) - ref = node.getElementsByTagName('mds_ref')[0] - self.mds_uuid = ref.getAttribute('uuidref') + def __init__(self,dom_node): + Module.__init__(self, 'MDC', dom_node) + self.mds_uuid = get_first_ref(dom_node, 'mds') + self.lookup_server(self.mds_uuid) self.add_module('mdc') def prepare(self): self.info(self.mds_uuid) - mds = lookup(self.dom_node.parentNode, self.mds_uuid) - if mds == None: - panic(self.mdsuuid, "not found.") - net = get_ost_net(self.dom_node.parentNode, self.mds_uuid) - srv = Network(net) + srv = self.get_server() lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_buf, srv.read_buf) lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid), setup ="%s %s" %(self.mds_uuid, srv.uuid)) - def cleanup(self): - self.info(self.mds_uuid) - net = get_ost_net(self.dom_node.parentNode, self.mds_uuid) - srv = Network(net) - try: - lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid) - except CommandError: - print "disconnect failed: ", self.name - try: - lctl.cleanup(self.name, self.uuid) - except CommandError: - print "cleanup failed: ", self.name - class OBD(Module): - def __init__(self, node): - Module.__init__(self, 'OBD', node) - self.obdtype = node.getAttribute('type') - self.devname, self.size = getDevice(node) - self.fstype = getText(node, 'fstype') - self.format = getText(node, 'autoformat', 'yes') + def __init__(self, dom_node): + Module.__init__(self, 'OBD', dom_node) + self.obdtype = get_attr(dom_node, 'type') + self.devname, self.size = get_device(dom_node) + self.fstype = get_text(dom_node, 'fstype') + self.format = get_text(dom_node, 'autoformat', 'yes') if self.fstype == 'extN': self.add_module('extN') self.add_module(self.obdtype) @@ -759,10 +780,9 @@ class OBD(Module): clean_loop(self.devname) class OST(Module): - def __init__(self,node): - Module.__init__(self, 'OST', node) - ref = node.getElementsByTagName('obd_ref')[0] - self.obd_uuid = ref.getAttribute('uuidref') + def __init__(self,dom_node): + Module.__init__(self, 'OST', dom_node) + self.obd_uuid = get_first_ref(dom_node, 'obd') self.add_module('ost') def prepare(self): @@ -771,43 +791,32 @@ class OST(Module): setup ="%s" % (self.obd_uuid)) class OSC(Module): - def __init__(self,node): - Module.__init__(self, 'OSC', node) - ref = node.getElementsByTagName('obd_ref')[0] - self.obd_uuid = ref.getAttribute('uuidref') - ref = node.getElementsByTagName('ost_ref')[0] - self.ost_uuid = ref.getAttribute('uuidref') + def __init__(self,dom_node): + Module.__init__(self, 'OSC', dom_node) + self.obd_uuid = get_first_ref(dom_node, 'obd') + self.ost_uuid = get_first_ref(dom_node, 'ost') + self.lookup_server(self.ost_uuid) self.add_module('osc') def prepare(self): self.info(self.obd_uuid, self.ost_uuid) - net = get_ost_net(self.dom_node.parentNode, self.ost_uuid) - srv = Network(net) - lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_buf, srv.read_buf) + srv = self.get_server() + if local_net(srv): + lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_buf, srv.read_buf) + else: + r = find_route(srv) + lctl.add_route_host(r[0], srv.uuid, r[1], r[2]) + lctl.newdev(attach="osc %s %s" % (self.name, self.uuid), setup ="%s %s" %(self.obd_uuid, srv.uuid)) - def cleanup(self): - self.info(self.obd_uuid, self.ost_uuid) - net_uuid = get_ost_net(self.dom_node.parentNode, self.ost_uuid) - srv = Network(net_uuid) - try: - lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid) - except CommandError: - print " failed: ", self.name - try: - lctl.cleanup(self.name, self.uuid) - except CommandError: - print "cleanup failed: ", self.name class Mountpoint(Module): - def __init__(self,node): - Module.__init__(self, 'MTPT', node) - self.path = getText(node, 'path') - ref = node.getElementsByTagName('mdc_ref')[0] - self.mdc_uuid = ref.getAttribute('uuidref') - ref = node.getElementsByTagName('osc_ref')[0] - self.lov_uuid = ref.getAttribute('uuidref') + def __init__(self,dom_node): + Module.__init__(self, 'MTPT', dom_node) + self.path = get_text(dom_node, 'path') + self.mdc_uuid = get_first_ref(dom_node, 'mdc') + self.lov_uuid = get_first_ref(dom_node, 'osc') self.add_module('osc') self.add_module('llite') @@ -815,7 +824,7 @@ class Mountpoint(Module): l = lookup(self.dom_node.parentNode, self.lov_uuid) if l.nodeName == 'lov': lov = LOV(l) - for osc_uuid in string.split(lov.devlist): + for osc_uuid in lov.devlist: osc = lookup(self.dom_node.parentNode, osc_uuid) if osc: n = OSC(osc) @@ -837,63 +846,119 @@ class Mountpoint(Module): panic("mount failed:", self.path) def cleanup(self): self.info(self.path, self.mdc_uuid,self.lov_uuid) - run("umount", self.path) + (rc, out) = run("umount", self.path) + if rc: + log("umount failed, cleanup will most likely not work.") l = lookup(self.dom_node.parentNode, self.lov_uuid) if l.nodeName == 'lov': lov = LOV(l) - for osc_uuid in string.split(lov.devlist): + for osc_uuid in lov.devlist: osc = lookup(self.dom_node.parentNode, osc_uuid) if osc: n = OSC(osc) n.cleanup() else: panic('osc not found:', osc_uuid) - lov.cleanup() else: osc = OSC(l) osc.cleanup() +class Router(Module): + def __init__(self,dom_node): + Module.__init__(self, 'ROUTER', dom_node) + self.add_module('kptlrouter') + + def prepare(self): + self.info() + + # ============================================================ # XML processing and query # TODO: Change query funcs to use XPath, which is muc cleaner -def getDevice(obd): +def get_device(obd): list = obd.getElementsByTagName('device') if len(list) > 0: dev = list[0] dev.normalize(); - try: - size = int(dev.getAttribute('size')) - except ValueError: - size = 0 + size = get_attr_int(dev, 'size', 0) return dev.firstChild.data, size return '', 0 # Get the text content from the first matching child # If there is no content (or it is all whitespace), return # the default -def getText(node, tag, default=""): - list = node.getElementsByTagName(tag) +def get_text(dom_node, tag, default=""): + list = dom_node.getElementsByTagName(tag) if len(list) > 0: - node = list[0] - node.normalize() - if node.firstChild: - txt = string.strip(node.firstChild.data) + dom_node = list[0] + dom_node.normalize() + if dom_node.firstChild: + txt = string.strip(dom_node.firstChild.data) if txt: return txt return default -def get_ost_net(node, uuid): - ost = lookup(node, uuid) - list = ost.getElementsByTagName('network_ref') - if list: - uuid = list[0].getAttribute('uuidref') - else: +def get_text_int(dom_node, tag, default=0): + list = dom_node.getElementsByTagName(tag) + n = default + if len(list) > 0: + dom_node = list[0] + dom_node.normalize() + if dom_node.firstChild: + txt = string.strip(dom_node.firstChild.data) + if txt: + try: + n = int(txt) + except ValueError: + panic("text value is not integer:", txt) + return n + +def get_attr(dom_node, attr, default=""): + v = dom_node.getAttribute(attr) + if v: + return v + return default + +def get_attr_int(dom_node, attr, default=0): + n = default + v = dom_node.getAttribute(attr) + if v: + try: + n = int(v) + except ValueError: + panic("attr value is not integer", v) + return n + +def get_first_ref(dom_node, tag): + """ Get the first uuidref of the type TAG. Used one only + one is expected. Returns the uuid.""" + uuid = None + refname = '%s_ref' % tag + list = dom_node.getElementsByTagName(refname) + if len(list) > 0: + uuid = getRef(list[0]) + return uuid + +def get_all_refs(dom_node, tag): + """ Get all the refs of type TAG. Returns list of uuids. """ + uuids = [] + refname = '%s_ref' % tag + list = dom_node.getElementsByTagName(refname) + if len(list) > 0: + for i in list: + uuids.append(getRef(i)) + return uuids + +def get_ost_net(dom_node, uuid): + ost = lookup(dom_node, uuid) + uuid = get_first_ref(ost, 'network') + if not uuid: return None - return lookup(node, uuid) + return lookup(dom_node, uuid) -def lookup(node, uuid): - for n in node.childNodes: +def lookup(dom_node, uuid): + for n in dom_node.childNodes: if n.nodeType == n.ELEMENT_NODE: if getUUID(n) == uuid: return n @@ -902,48 +967,48 @@ def lookup(node, uuid): if n: return n return None -# Get name attribute of node -def getName(node): - return node.getAttribute('name') +# Get name attribute of dom_node +def getName(dom_node): + return dom_node.getAttribute('name') -def getRef(node): - return node.getAttribute('uuidref') +def getRef(dom_node): + return dom_node.getAttribute('uuidref') -# Get name attribute of node -def getUUID(node): - return node.getAttribute('uuid') +# Get name attribute of dom_node +def getUUID(dom_node): + return dom_node.getAttribute('uuid') # the tag name is the service type -# fixme: this should do some checks to make sure the node is a service -def getServiceType(node): - return node.nodeName +# fixme: this should do some checks to make sure the dom_node is a service +def getServiceType(dom_node): + return dom_node.nodeName # # determine what "level" a particular node is at. -# the order of iniitailization is based on level. objects -# are assigned a level based on type: -# net,devices,ldlm:1, obd, mdd:2 mds,ost:3 osc,mdc:4 mounts:5 -def getServiceLevel(node): - type = getServiceType(node) - if type in ('network',): +# the order of iniitailization is based on level. +def getServiceLevel(dom_node): + type = getServiceType(dom_node) + if type in ('ptlrouter',): return 1 - if type in ('device', 'ldlm'): - return 2 + if type in ('network',): + return 10 + elif type in ('device', 'ldlm'): + return 20 elif type in ('obd', 'mdd'): - return 3 + return 30 elif type in ('mds','ost'): - return 4 + return 40 elif type in ('mdc','osc'): - return 5 + return 50 elif type in ('lov',): - return 6 + return 60 elif type in ('mountpoint',): - return 7 + return 70 return 0 # # return list of services in a profile. list is a list of tuples -# [(level, node),] +# [(level, dom_node),] def getServices(lustreNode, profileNode): list = [] for n in profileNode.childNodes: @@ -957,7 +1022,7 @@ def getServices(lustreNode, profileNode): list.sort() return list -def getByName(lustreNode, tag, name): +def getByName(lustreNode, name, tag): ndList = lustreNode.getElementsByTagName(tag) for nd in ndList: if getName(nd) == name: @@ -965,46 +1030,122 @@ def getByName(lustreNode, tag, name): return None -# ============================================================ + + +############################################################ +# routing ("rooting") +# +routes = [] +local_node = [] + +def init_node(dom_node): + global local_node + netlist = dom_node.getElementsByTagName('network') + for dom_net in netlist: + type = get_attr(dom_net, 'type') + gw = get_text(dom_net, 'server') + local_node.append((type, gw)) + + +def get_routes(type, gw, dom_net): + """ Return the routes as a list of tuples of the form: + [(type, gw, lo, hi),]""" + res = [] + tbl = dom_net.getElementsByTagName('route_tbl') + routes = tbl[0].getElementsByTagName('route') + for r in routes: + lo = get_attr(r, 'lo') + hi = get_attr(r, 'hi', '') + res.append((type, gw, lo, hi)) + return res + + +def init_route_config(lustre): + """ Scan the lustre config looking for routers. Build list of + routes. """ + global routes + routes = [] + list = lustre.getElementsByTagName('node') + for node in list: + if get_attr(node, 'router'): + for (local_type, local_nid) in local_node: + gw = None + netlist = node.getElementsByTagName('network') + for dom_net in netlist: + if local_type == get_attr(dom_net, 'type'): + gw = get_text(dom_net, 'server') + break + if not gw: + continue + for dom_net in netlist: + if local_type != get_attr(dom_net, 'type'): + for route in get_routes(local_type, gw, dom_net): + routes.append(route) + + +def local_net(net): + global local_node + for iface in local_node: + if net.net_type == iface[0]: + return 1 + return 0 + +def find_route(net): + global local_node, routes + frm_type = local_node[0][0] + to_type = net.net_type + to = net.nid + debug ('looking for route to', to_type,to) + for r in routes: + if r[2] == to: + return r + return None + + + + +############################################################ # lconf level logic # Start a service. -def startService(node, clean_flag, module_flag): - type = getServiceType(node) - debug('Service:', type, getName(node), getUUID(node)) +def startService(dom_node, module_flag): + type = getServiceType(dom_node) + debug('Service:', type, getName(dom_node), getUUID(dom_node)) # there must be a more dynamic way of doing this... n = None if type == 'ldlm': - n = LDLM(node) + n = LDLM(dom_node) elif type == 'lov': - n = LOV(node) + n = LOV(dom_node) elif type == 'network': - n = Network(node) + n = Network(dom_node) elif type == 'obd': - n = OBD(node) + n = OBD(dom_node) elif type == 'ost': - n = OST(node) + n = OST(dom_node) elif type == 'mds': - n = MDS(node) + n = MDS(dom_node) elif type == 'osc': - n = OSC(node) + n = OSC(dom_node) elif type == 'mdc': - n = MDC(node) + n = MDC(dom_node) elif type == 'mountpoint': - n = Mountpoint(node) + n = Mountpoint(dom_node) + elif type == 'ptlrouter': + n = Router(dom_node) else: panic ("unknown service type:", type) if module_flag: if config.nomod(): return - if clean_flag: + if config.cleanup(): n.cleanup_module() else: n.load_module() else: if config.nosetup(): return - if clean_flag: + if config.cleanup(): n.cleanup() else: n.prepare() @@ -1017,37 +1158,43 @@ def startService(node, clean_flag, module_flag): # * make sure partitions are in place and prepared # * initialize devices with lctl # Levels is important, and needs to be enforced. -def startProfile(lustreNode, profileNode, clean_flag, module_flag): +def startProfile(lustreNode, profileNode, module_flag): if not profileNode: panic("profile:", profile, "not found.") services = getServices(lustreNode, profileNode) - if clean_flag: + if config.cleanup(): services.reverse() for s in services: - startService(s[1], clean_flag, module_flag) + startService(s[1], module_flag) + # # Load profile for -def doHost(lustreNode, hosts, clean_flag): - node = None +def doHost(lustreNode, hosts): + global routes + dom_node = None for h in hosts: - node = getByName(lustreNode, 'node', h) - if node: + dom_node = getByName(lustreNode, h, 'node') + if dom_node: break - if not node: + if not dom_node: print 'No host entry found.' return + if not get_attr(dom_node, 'router'): + init_node(dom_node) + init_route_config(lustreNode) + # Two step process: (1) load modules, (2) setup lustre # if not cleaning, load modules first. - module_flag = not clean_flag - reflist = node.getElementsByTagName('profile') + module_flag = not config.cleanup() + reflist = dom_node.getElementsByTagName('profile') for profile in reflist: - startProfile(lustreNode, profile, clean_flag, module_flag) + startProfile(lustreNode, profile, module_flag) - if not clean_flag: - setDebugPath() + if not config.cleanup(): + sys_set_debug_path() script = config.gdb_script() run(lctl.lctl, ' modules >', script) if config.gdb(): @@ -1057,8 +1204,9 @@ def doHost(lustreNode, hosts, clean_flag): module_flag = not module_flag for profile in reflist: - startProfile(lustreNode, profile, clean_flag, module_flag) + startProfile(lustreNode, profile, module_flag) +############################################################ # Command line processing # def parse_cmdline(argv): @@ -1117,7 +1265,7 @@ def setupModulePath(cmd): if os.access(base+"/Makefile", os.R_OK): config.src_dir(base + "/../../") -def setDebugPath(): +def sys_set_debug_path(): debug("debug path: ", config.debug_path()) if config.noexec(): return @@ -1129,7 +1277,7 @@ def setDebugPath(): print e -def makeDevices(): +def sys_make_devices(): if not os.access('/dev/portals', os.R_OK): run('mknod /dev/portals c 10 240') if not os.access('/dev/obd', os.R_OK): @@ -1175,8 +1323,8 @@ def main(): lctl = LCTLInterface('lctl') setupModulePath(sys.argv[0]) - makeDevices() - doHost(dom.documentElement, node_list, config.cleanup()) + sys_make_devices() + doHost(dom.documentElement, node_list) if __name__ == "__main__": try: diff --git a/lustre/utils/lmc b/lustre/utils/lmc index b49be8c..857674f 100755 --- a/lustre/utils/lmc +++ b/lustre/utils/lmc @@ -1,7 +1,6 @@ #!/usr/bin/env python -# -# Copyright (C) 2002 Cluster File Systems, Inc. -# Author: Robert Read +# Copyright (C) 2002 Cluster File Systems, Inc. +# Author: Robert Read # This file is part of Lustre, http://www.lustre.org. # @@ -26,6 +25,13 @@ lmc - lustre configurtion data manager # create nodes ./lmc --output config.xml --node server --net server1 tcp ./lmc --merge config.xml --node client --net client1 tcp +./lmc --merge config.xml --node client --route gw lo [hi] +./lmc --merge config.xml --router --node gw1 --net gw1 tcp +./lmc --merge config.xml --node gw1 --net 1 elan + +./lmc --merge config.xml --route elan 1 1 100 +./lmc --merge config.xml --route tcp gw1 ba1 + # configure server @@ -52,12 +58,21 @@ def usage(): print """usage: lmc [--node --ost | --mtpt | --lov] args Commands: --node node_name - Node_name by itself it will create a new node. When used with other - commands it specifies the node to modify + Node_name by itself it will create a new node. If the --router + option is used when creating a new node, then that node will also + be configured as a router. When used with other commands it + specifies the node to modify. --net hostname nettype [port, recv_buf, send_buf] Nettype is either tcp, elan, or gm. - Requires a node argument + Requires --node + +--route net gw lo [hi] + This command is used to create routes. NET is the + network type this route will be used on. The GW is an address of + one of the local interfaces. LO and HI represent a range of + addresses that can be reached through the gateway. If HI is not + set, then a route to the specific host in LO is created. --mds device [size] Create a MDS using the device @@ -113,7 +128,7 @@ def new_name(base): names[ret] = 1 return ret -def get_uuid(name): +def new_uuid(name): return "%s_UUID" % (name) ldlm_name = 'ldlm' @@ -121,7 +136,10 @@ ldlm_uuid = 'ldlm_UUID' def new_lustre(dom): """Create a new empty lustre document""" # adding ldlm here is a bit of a hack, but one is enough. - str = """ """ % (ldlm_name, ldlm_uuid) + str = """ + + + """ % (ldlm_name, ldlm_uuid) return dom.parseString(str) names = {} @@ -144,6 +162,9 @@ def get_format_flag(options): return 'yes' return 'no' +############################################################ +# Build config objects using DOM +# class GenConfig: doc = None dom = None @@ -186,6 +207,14 @@ class GenConfig: self.addElement(network, "port", "%d" %(port)) return network + def route(self, lo, hi): + """ create one entry for the route table """ + ref = self.doc.createElement('route') + ref.setAttribute("lo", lo) + if hi: + ref.setAttribute("hi", hi) + return ref + def node(self, name, uuid): """ create a host """ node = self.newService("node", name, uuid) @@ -256,6 +285,10 @@ class GenConfig: self.addElement(mtpt, "path", path) return mtpt +############################################################ +# Utilities to query a DOM tree +# Using this functions we can treat use config information +# directly as a database. def getName(n): return n.getAttribute('name') @@ -275,6 +308,7 @@ def findByName(lustre, name, tag = ""): if n: return n return None + def lookup(node, uuid): for n in node.childNodes: if n.nodeType == n.ELEMENT_NODE: @@ -298,6 +332,7 @@ def mds2node(lustre, mds_name): error("no node found for :", mds_name) return node + def name2uuid(lustre, name, tag="", fatal=1): ret = findByName(lustre, name, tag) if not ret: @@ -307,6 +342,7 @@ def name2uuid(lustre, name, tag="", fatal=1): return "" return getUUID(ret) + # XXX: assumes only one network element per node. will fix this # as soon as support for routers is added def get_net_uuid(lustre, node_name): @@ -319,12 +355,14 @@ def get_net_uuid(lustre, node_name): return getUUID(net[0]) return None + def lov_add_osc(gen, lov, osc_uuid): devs = lov.getElementsByTagName('devices') if len(devs) == 1: devs[0].appendChild(gen.ref("osc", osc_uuid)) else: error("No devices element found for LOV:", lov) + def node_add_profile(gen, node, ref, uuid): ret = node.getElementsByTagName('profile') @@ -332,9 +370,156 @@ def node_add_profile(gen, node, ref, uuid): error('node has no profile:', node) ret[0].appendChild(gen.ref(ref, uuid)) +def get_attr(dom_node, attr, default=""): + v = dom_node.getAttribute(attr) + if v: + return v + return default + +############################################################ +# Top level commands # -# Create a new obd, osc, and ost. Add them to the DOM. -# +def do_add_node(gen, lustre, options, node_name): + uuid = new_uuid(node_name) + node = gen.node(node_name, uuid) + node_add_profile(gen, node, 'ldlm', ldlm_uuid) + if options.has_key('router'): + node.setAttribute('router', '1') + node_add_profile(gen, node, "ptlrouter", 'PTLROUTER_UUID') + lustre.appendChild(node) + return node + + +def add_node(gen, lustre, options, args): + """ create a node with a network config """ + if len(args) > 1: + usage() + + node_name = options['node'] + + ret = findByName(lustre, node_name, "node") + if ret: + print "Node:", node_name, "exists." + return + do_add_node(gen, lustre, options, node_name) + + +def add_net(gen, lustre, options, args): + """ create a node with a network config """ + if len(args) < 2: + usage() + + node_name = options['node'] + nid = args[0] + net_type = args[1] + + if net_type == 'tcp': + if len(args) > 2: + port = int(args[2]) + else: + port = DEFAULT_PORT + # add send, recv buffer size here + elif net_type in ('elan', 'gm'): + port = 0 + else: + print "Unknown net_type: ", net_type + sys.exit(2) + + ret = findByName(lustre, node_name, "node") + if not ret: + node = do_add_node(gen, lustre, options, node_name) + else: + node = ret + net_name = new_name('NET_'+ node_name +'_'+ net_type) + net_uuid = new_uuid(net_name) + node.appendChild(gen.network(net_name, net_uuid, nid, net_type, port)) + node_add_profile(gen, node, "network", net_uuid) + + +def add_route(gen, lustre, options, args): + """ create a node with a network config """ + if len(args) < 3: + usage() + + node_name = options['node'] + net_type= args[0] + gw = args[1] + lo = args[2] + hi = '' + + if len(args) > 3: + hi = args[3] + + node = findByName(lustre, node_name, "node") + if not node: + error (node_name, " not found.") + + netlist = node.getElementsByTagName('network') + for net in netlist: + if get_attr(net, 'type') == net_type: + rlist = net.getElementsByTagName('route_tbl') + if len(rlist) > 0: + rtbl = rlist[0] + else: + rtbl = gen.addElement(net, 'route_tbl') + rtbl.appendChild(gen.route(lo, hi)) + + +def add_mds(gen, lustre, options, args): + if len(args) < 1: + usage() + + if options.has_key('node'): + node_name = options['node'] + else: + error("--mds requires a --node argument") + + mds_name = new_name(options['mds']) + devname = args[0] + if len(args) > 1: + size = args[1] + else: + size = 0 + + mdc_name = 'MDC_' + mds_name + mds_uuid = new_uuid(mds_name) + mdc_uuid = new_uuid(mdc_name) + + node_uuid = name2uuid(lustre, node_name) + + node = findByName(lustre, node_name, "node") + node_add_profile(gen, node, "mds", mds_uuid) + net_uuid = get_net_uuid(lustre, node_name) + if not net_uuid: + error("NODE: ", node_name, "not found") + + + mds = gen.mds(mds_name, mds_uuid, "extN", devname, get_format_flag(options), + net_uuid, node_uuid, dev_size=size) + mdc = gen.mdc(mdc_name, mdc_uuid, mds_uuid) + lustre.appendChild(mds) + lustre.appendChild(mdc) + + +def add_mdc(gen, lustre, options, args): + """ create mtpt on a node """ + if len(args) < 1: + usage() + + if options.has_key('node'): + node_name = options['node'] + else: + error("--mdc requires a --node argument") + + mdc_name = args[0] + mdc_uuid = name2uuid(lustre, mdc_name) + + node = findByName(lustre, node_name, "node") + if not node: + error('node:', node_name, "not found.") + node_add_profile(gen, node, "mdc", mdc_uuid) + + def add_ost(gen, lustre, options, args): lovname = '' obdtype = 'obdfilter' @@ -365,9 +550,9 @@ def add_ost(gen, lustre, options, args): obdname = new_name('OBD_'+ node_name) oscname = new_name('OSC_'+ node_name) ostname = new_name('OST_'+ node_name) - obd_uuid = get_uuid(obdname) - ost_uuid = get_uuid(ostname) - osc_uuid = get_uuid(oscname) + obd_uuid = new_uuid(obdname) + ost_uuid = new_uuid(ostname) + osc_uuid = new_uuid(oscname) net_uuid = get_net_uuid(lustre, node_name) if not net_uuid: @@ -390,6 +575,7 @@ def add_ost(gen, lustre, options, args): lustre.appendChild(obd) lustre.appendChild(osc) lustre.appendChild(ost) + # this is generally only used by llecho.sh def add_osc(gen, lustre, options, args): @@ -405,57 +591,6 @@ def add_osc(gen, lustre, options, args): node = findByName(lustre, node_name, "node") node_add_profile(gen, node, 'osc', osc_uuid) -def add_net(gen, lustre, options, args): - """ create a node with a network config """ - if len(args) < 2: - usage() - - node_name = options['node'] - nid = args[0] - net_type = args[1] - - if net_type == 'tcp': - if len(args) > 2: - port = int(args[2]) - else: - port = DEFAULT_PORT - # add send, recv buffer size here - elif net_type in ('elan', 'gm'): - port = 0 - else: - print "Unknown net_type: ", net_type - sys.exit(2) - - ret = findByName(lustre, node_name, "node") - if not ret: - node = do_add_node(gen, lustre, node_name) - else: - node = ret - net_name = new_name('NET_'+ node_name +'_'+ net_type) - net_uuid = get_uuid(net_name) - node.appendChild(gen.network(net_name, net_uuid, nid, net_type, port)) - node_add_profile(gen, node, "network", net_uuid) - -def do_add_node(gen, lustre, node_name): - uuid = get_uuid(node_name) - node = gen.node(node_name, uuid) - node_add_profile(gen, node, 'ldlm', ldlm_uuid) - lustre.appendChild(node) - return node - -def add_node(gen, lustre, options, args): - """ create a node with a network config """ - if len(args) > 1: - usage() - - node_name = options['node'] - - ret = findByName(lustre, node_name, "node") - if ret: - print "Node:", node_name, "exists." - return - do_add_node(gen, lustre, node_name) - def add_lov(gen, lustre, options, args): """ create a lov """ @@ -467,7 +602,7 @@ def add_lov(gen, lustre, options, args): stripe_sz = args[1] stripe_off = args[2] pattern = args[3] - uuid = get_uuid(name) + uuid = new_uuid(name) ret = findByName(lustre, name, "lov") if ret: @@ -480,6 +615,7 @@ def add_lov(gen, lustre, options, args): lov = gen.lov(name, uuid, mds_uuid, stripe_sz, stripe_off, pattern) lustre.appendChild(lov) + def add_mtpt(gen, lustre, options, args): """ create mtpt on a node """ if len(args) < 3: @@ -506,7 +642,7 @@ def add_mtpt(gen, lustre, options, args): if not lov_uuid: lov_uuid = name2uuid(lustre, lov_name, tag='osc', fatal=1) - uuid = get_uuid(name) + uuid = new_uuid(name) mtpt = gen.mountpoint(name, uuid, mdc_uuid, lov_uuid, path) node = findByName(lustre, node_name, "node") if not node: @@ -515,68 +651,14 @@ def add_mtpt(gen, lustre, options, args): node_add_profile(gen, node, "mdc", mdc_uuid) lustre.appendChild(mtpt) -def add_mdc(gen, lustre, options, args): - """ create mtpt on a node """ - if len(args) < 1: - usage() - - if options.has_key('node'): - node_name = options['node'] - else: - error("--mdc requires a --node argument") - - mdc_name = args[0] - mdc_uuid = name2uuid(lustre, mdc_name) - - node = findByName(lustre, node_name, "node") - if not node: - error('node:', node_name, "not found.") - node_add_profile(gen, node, "mdc", mdc_uuid) - -def add_mds(gen, lustre, options, args): - if len(args) < 1: - usage() - - if options.has_key('node'): - node_name = options['node'] - else: - error("--mds requires a --node argument") - - mds_name = new_name(options['mds']) - devname = args[0] - if len(args) > 1: - size = args[1] - else: - size = 0 - mdc_name = 'MDC_' + mds_name - mds_uuid = get_uuid(mds_name) - mdc_uuid = get_uuid(mdc_name) - - node_uuid = name2uuid(lustre, node_name) - - node = findByName(lustre, node_name, "node") - node_add_profile(gen, node, "mds", mds_uuid) - net_uuid = get_net_uuid(lustre, node_name) - if not net_uuid: - error("NODE: ", node_name, "not found") - - - mds = gen.mds(mds_name, mds_uuid, "extN", devname, get_format_flag(options), - net_uuid, node_uuid, dev_size=size) - mdc = gen.mdc(mdc_name, mdc_uuid, mds_uuid) - lustre.appendChild(mds) - lustre.appendChild(mdc) - - -# +############################################################ # Command line processing # - def parse_cmdline(argv): short_opts = "ho:i:m:" long_opts = ["ost", "osc", "mtpt", "lov=", "node=", "mds=", "net", - "mdc", "merge=", "format", "reformat", "output=", + "mdc", "route", "router", "merge=", "format", "reformat", "output=", "obdtype=", "in=", "help"] opts = [] args = [] @@ -606,6 +688,10 @@ def parse_cmdline(argv): options['mtpt'] = 1 if o == "--node": options['node'] = a + if o == "--route": + options['route'] = 1 + if o == "--router": + options['router'] = 1 if o == "--lov": options['lov'] = a if o in ("-m", "--merge"): @@ -622,6 +708,7 @@ def parse_cmdline(argv): return options, args +# simple class for profiling import time class chrono: def __init__(self): @@ -640,7 +727,9 @@ class chrono: str = '%s: %g secs' % (msg, d) print str - +############################################################ +# Main +# def main(): options, args = parse_cmdline(sys.argv[1:]) outFile = '-' @@ -677,6 +766,8 @@ def main(): add_net(gen, lustre, options, args) elif options.has_key('lov'): add_lov(gen, lustre, options, args) + elif options.has_key('route'): + add_route(gen, lustre, options, args) elif options.has_key('node'): add_node(gen, lustre, options, args) else: -- 1.8.3.1