#!/usr/bin/env python # # Copyright (C) 2002 Cluster File Systems, Inc. # Author: Robert Read # This file is part of Lustre, http://www.lustre.org. # # Lustre is free software; you can redistribute it and/or # modify it under the terms of version 2 of the GNU General Public # License as published by the Free Software Foundation. # # Lustre is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Lustre; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # lconf - lustre configuration tool # # lconf is the main driver script for starting and stopping # lustre filesystem services. # # Based in part on the XML obdctl modifications done by Brian Behlendorf import sys, getopt import string, os, stat, popen2, socket, time, random import re, exceptions import xml.dom.minidom # Global parameters TCP_ACCEPTOR = '' MAXTCPBUF = 1048576 DEFAULT_TCPBUF = 1048576 # # Maximum number of devices to search for. # (the /dev/loop* nodes need to be created beforehand) MAX_LOOP_DEVICES = 256 first_cleanup_error = 0 def cleanup_error(rc): global first_cleanup_error if not first_cleanup_error: first_cleanup_error = rc def usage(): print """usage: lconf config.xml config.xml Lustre configuration in xml format. --get URL to fetch a config file --node Load config for -d | --cleanup Cleans up config. (Shutdown) -f | --force Forced unmounting and/or obd detach during cleanup -v | --verbose Print system commands as they are run -h | --help Print this help --gdb Prints message after creating gdb module script and sleeps for 5 seconds. -n | --noexec Prints the commands and steps that will be run for a config without executing them. This can used to check if a config file is doing what it should be doing. (Implies -v) --nomod Skip load/unload module step. --nosetup Skip device setup/cleanup step. --reformat Reformat all devices (without question) --dump Dump the kernel debug log before portals is unloaded --minlevel Specify the minimum level of services to configure/cleanup (default 0) --maxlevel Specify the maximum level of services to configure/cleanup (default 100) Levels are aproximatly like: 10 - network 20 - device, ldlm 30 - obd, mdd 40 - mds, ost 50 - mdc, osc 60 - lov, lovconfig 70 - mountpoint, echo_client """ TODO = """ --ldap server LDAP server with lustre config database --makeldiff Translate xml source to LDIFF This are perhaps not needed: --lustre="src dir" Base directory of lustre sources. Used to search for modules. --portals=src Portals source """ sys.exit() # ============================================================ # Config parameters, encapsulated in a class class Config: def __init__(self): # flags self._noexec = 0 self._verbose = 0 self._reformat = 0 self._cleanup = 0 self._gdb = 0 self._nomod = 0 self._nosetup = 0 self._force = 0 # parameters self._modules = None self._node = None self._url = None self._gdb_script = '/tmp/ogdb' self._debug_path = '/tmp/lustre-log' self._dump_file = None self._src_dir = None self._minlevel = 0 self._maxlevel = 100 def verbose(self, flag = None): if flag: self._verbose = flag return self._verbose def noexec(self, flag = None): if flag: self._noexec = flag return self._noexec def reformat(self, flag = None): if flag: self._reformat = flag return self._reformat def cleanup(self, flag = None): if flag: self._cleanup = flag return self._cleanup def gdb(self, flag = None): if flag: self._gdb = flag return self._gdb def nomod(self, flag = None): if flag: self._nomod = flag return self._nomod def nosetup(self, flag = None): if flag: self._nosetup = flag return self._nosetup def force(self, flag = None): if flag: self._force = flag return self._force def node(self, val = None): if val: self._node = val return self._node def url(self, val = None): if val: self._url = val return self._url def gdb_script(self): if os.path.isdir('/r'): return '/r' + self._gdb_script else: return self._gdb_script def debug_path(self): if os.path.isdir('/r'): return '/r' + self._debug_path else: return self._debug_path def src_dir(self, val = None): if val: self._src_dir = val return self._src_dir def dump_file(self, val = None): if val: self._dump_file = val return self._dump_file def minlevel(self, val = None): if val: self._minlevel = int(val) return self._minlevel def maxlevel(self, val = None): if val: self._maxlevel = int(val) return self._maxlevel config = Config() # ============================================================ # debugging and error funcs def fixme(msg = "this feature"): raise LconfError, msg + ' not implmemented yet.' def panic(*args): msg = string.join(map(str,args)) if not config.noexec(): raise LconfError(msg) else: print "! " + msg def log(*args): msg = string.join(map(str,args)) print msg def logall(msgs): for s in msgs: print string.strip(s) def debug(*args): if config.verbose(): msg = string.join(map(str,args)) print msg # ============================================================ # locally defined exceptions class CommandError (exceptions.Exception): def __init__(self, cmd_name, cmd_err, rc=None): self.cmd_name = cmd_name self.cmd_err = cmd_err self.rc = rc def dump(self): import types if type(self.cmd_err) == types.StringType: if self.rc: print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err) else: print "! %s: %s" % (self.cmd_name, self.cmd_err) elif type(self.cmd_err) == types.ListType: if self.rc: print "! %s (error %d):" % (self.cmd_name, self.rc) else: print "! %s:" % (self.cmd_name) for s in self.cmd_err: print "> %s" %(string.strip(s)) else: print self.cmd_err class LconfError (exceptions.Exception): def __init__(self, args): self.args = args # ============================================================ # handle lctl interface class LCTLInterface: """ Manage communication with lctl """ def __init__(self, cmd): """ Initialize close by finding the lctl binary. """ self.lctl = find_prog(cmd) if not self.lctl: if config.noexec(): debug('! lctl not found') self.lctl = 'lctl' else: raise CommandError('lctl', "unable to find lctl binary.") def run(self, cmds): """ run lctl the cmds are written to stdin of lctl lctl doesn't return errors when run in script mode, so stderr is checked should modify command line to accept multiple commands, or create complex command line options """ debug("+", self.lctl, cmds) if config.noexec(): return (0, []) p = popen2.Popen3(self.lctl, 1) p.tochild.write(cmds + "\n") p.tochild.close() out = p.fromchild.readlines() err = p.childerr.readlines() ret = p.wait() if os.WIFEXITED(ret): rc = os.WEXITSTATUS(ret) else: rc = 0 if rc or len(err): raise CommandError(self.lctl, err, rc) return rc, out def runcmd(self, *args): """ run lctl using the command line """ cmd = string.join(map(str,args)) debug("+", self.lctl, cmd) rc, out = run(self.lctl, cmd) if rc: raise CommandError(self.lctl, out, rc) return rc, out def network(self, net, nid): """ initialized network and add "self" """ # Idea: "mynid" could be used for all network types to add "self," and then # this special case would be gone and the "self" hack would be hidden. if net in ('tcp', 'toe'): cmds = """ network %s mynid %s add_uuid self %s quit""" % (net, nid, nid) else: cmds = """ network %s add_uuid self %s quit""" % (net, nid) self.run(cmds) # create a new connection def connect(self, net, nid, port, servuuid, send_mem, recv_mem): if net in ('tcp', 'toe'): cmds = """ network %s add_uuid %s %s send_mem %d recv_mem %d connect %s %d quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, ) else: cmds = """ network %s add_uuid %s %s connect %s %d quit""" % (net, servuuid, nid, nid, port, ) self.run(cmds) # add a route to a range def add_route(self, net, gw, lo, hi): cmds = """ network %s add_route %s %s %s quit """ % (net, gw, lo, hi) self.run(cmds) def del_route(self, net, gw, lo, hi): cmds = """ ignore_errors network %s del_route %s quit """ % (net, lo) self.run(cmds) # add a route to a host def add_route_host(self, net, uuid, gw, tgt): cmds = """ network %s add_uuid %s %s add_route %s %s quit """ % (net, uuid, tgt, gw, tgt) self.run(cmds) # add a route to a range def del_route_host(self, net, uuid, gw, tgt): cmds = """ ignore_errors network %s del_uuid %s del_route %s quit """ % (net, uuid, tgt) self.run(cmds) # disconnect one connection def disconnect(self, net, nid, port, servuuid): cmds = """ ignore_errors network %s disconnect %s del_uuid %s quit""" % (net, nid, servuuid) self.run(cmds) # disconnect all def disconnectAll(self, net): cmds = """ ignore_errors network %s del_uuid self disconnect quit""" % (net) self.run(cmds) # create a new device with lctl def newdev(self, attach, setup = ""): cmds = """ newdev attach %s setup %s quit""" % (attach, setup) self.run(cmds) # cleanup a device def cleanup(self, name, uuid): cmds = """ ignore_errors device $%s cleanup detach %s quit""" % (name, ('', 'force')[config.force()]) self.run(cmds) # create an lov def lov_setconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist): cmds = """ device $%s probe lov_setconfig %s %d %d %d %s %s quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist) self.run(cmds) # dump the log file def dump(self, dump_file): cmds = """ debug_kernel %s 1 quit""" % (dump_file) self.run(cmds) # get list of devices def device_list(self): rc, out = self.runcmd('device_list') return out # get lustre version def lustre_version(self): rc, out = self.runcmd('version') return out # ============================================================ # Various system-level functions # (ideally moved to their own module) # Run a command and return the output and status. # stderr is sent to /dev/null, could use popen3 to # save it if necessary def run(*args): cmd = string.join(map(str,args)) debug ("+", cmd) if config.noexec(): return (0, []) f = os.popen(cmd + ' 2>&1') out = f.readlines() ret = f.close() if ret: ret = ret >> 8 else: ret = 0 return (ret, out) # Run a command in the background. def run_daemon(*args): cmd = string.join(map(str,args)) debug ("+", cmd) if config.noexec(): return 0 f = os.popen(cmd + ' 2>&1') ret = f.close() if ret: ret = ret >> 8 else: ret = 0 return ret # Determine full path to use for an external command # searches dirname(argv[0]) first, then PATH def find_prog(cmd): syspath = string.split(os.environ['PATH'], ':') cmdpath = os.path.dirname(sys.argv[0]) syspath.insert(0, cmdpath); syspath.insert(0, os.path.join(cmdpath, '../../portals/linux/utils/')) for d in syspath: prog = os.path.join(d,cmd) if os.access(prog, os.X_OK): return prog return '' # Recursively look for file starting at base dir def do_find_file(base, mod): fullname = os.path.join(base, mod) if os.access(fullname, os.R_OK): return fullname for d in os.listdir(base): dir = os.path.join(base,d) if os.path.isdir(dir): module = do_find_file(dir, mod) if module: return module def find_module(src_dir, dev_dir, modname): mod = '%s.o' % (modname) module = src_dir +'/'+ dev_dir +'/'+ mod try: if os.access(module, os.R_OK): return module except OSError: pass return None # is the path a block device? def is_block(path): s = () try: s = os.stat(path) except OSError: return 0 return stat.S_ISBLK(s[stat.ST_MODE]) # build fs according to type # fixme: dangerous def mkfs(fstype, dev): if(fstype in ('ext3', 'extN')): mkfs = 'mkfs.ext2 -j -b 4096' else: print 'unsupported fs type: ', fstype if not is_block(dev): force = '-F' else: force = '' (ret, out) = run (mkfs, force, dev) if ret: panic("Unable to build fs:", dev) # enable hash tree indexing on fsswe # FIXME: this check can probably go away on 2.5 if fstype == 'extN': htree = 'echo "feature FEATURE_C5" | debugfs -w' (ret, out) = run (htree, dev) if ret: panic("Unable to enable htree:", dev) # some systems use /dev/loopN, some /dev/loop/N def loop_base(): import re loop = '/dev/loop' if not os.access(loop + str(0), os.R_OK): loop = loop + '/' if not os.access(loop + str(0), os.R_OK): panic ("can't access loop devices") return loop # find loop device assigned to thefile def find_loop(file): loop = loop_base() for n in xrange(0, MAX_LOOP_DEVICES): dev = loop + str(n) if os.access(dev, os.R_OK): (stat, out) = run('losetup', dev) if (out and stat == 0): m = re.search(r'\((.*)\)', out[0]) if m and file == m.group(1): return dev else: break return '' # create file if necessary and assign the first free loop device def init_loop(file, size, fstype): dev = find_loop(file) if dev: print 'WARNING file:', file, 'already mapped to', dev return dev if config.reformat() or not os.access(file, os.R_OK | os.W_OK): run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file)) loop = loop_base() # find next free loop for n in xrange(0, MAX_LOOP_DEVICES): dev = loop + str(n) if os.access(dev, os.R_OK): (stat, out) = run('losetup', dev) if (stat): run('losetup', dev, file) return dev else: print "out of loop devices" return '' print "out of loop devices" return '' # undo loop assignment def clean_loop(file): dev = find_loop(file) if dev: ret, out = run('losetup -d', dev) if ret: log('unable to clean loop device:', dev, 'for file:', file) logall(out) # determine if dev is formatted as a filesystem def need_format(fstype, dev): # FIXME don't know how to implement this return 0 # initialize a block device if needed def block_dev(dev, size, fstype, format): if config.noexec(): return dev if not is_block(dev): dev = init_loop(dev, size, fstype) if config.reformat() or (need_format(fstype, dev) and format == 'yes'): mkfs(fstype, dev) # else: # panic("device:", dev, # "not prepared, and autoformat is not set.\n", # "Rerun with --reformat option to format ALL filesystems") return dev def if2addr(iface): """lookup IP address for an interface""" rc, out = run("/sbin/ifconfig", iface) if rc or not out: return None addr = string.split(out[1])[1] ip = string.split(addr, ':')[1] return ip def get_local_address(net_type, wildcard): """Return the local address for the network type.""" local = "" if net_type in ('tcp', 'toe'): if ':' in wildcard: iface, star = string.split(wildcard, ':') local = if2addr(iface) if not local: panic ("unable to determine ip for:", wildcard) else: host = socket.gethostname() local = socket.gethostbyname(host) elif net_type == 'elan': # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position' try: fp = open('/proc/elan/device0/position', 'r') lines = fp.readlines() fp.close() for l in lines: a = string.split(l) if a[0] == 'NodeId': local = a[1] break except IOError, e: log(e) elif net_type == 'gm': fixme("automatic local address for GM") return local def is_prepared(uuid): """Return true if a device exists for the uuid""" # expect this format: # 1 UP ldlm ldlm ldlm_UUID 2 try: out = lctl.device_list() for s in out: if uuid == string.split(s)[4]: return 1 except CommandError, e: e.dump() return 0 # ============================================================ # Classes to prepare and cleanup the various objects # class Module: """ Base class for the rest of the modules. The default cleanup method is defined here, as well as some utilitiy funcs. """ def __init__(self, module_name, dom_node): self.dom_node = dom_node self.module_name = module_name self.name = get_attr(dom_node, 'name') self.uuid = get_attr(dom_node, 'uuid') self.kmodule_list = [] self._server = None self._connected = 0 def info(self, *args): msg = string.join(map(str,args)) print self.module_name + ":", self.name, self.uuid, msg def lookup_server(self, srv_uuid): """ Lookup a server's network information """ net = get_ost_net(self.dom_node.parentNode, srv_uuid) if not net: panic ("Unable to find a server for:", srv_uuid) self._server = Network(net) def get_server(self): return self._server def cleanup(self): """ default cleanup, used for most modules """ self.info() srv = self.get_server() if srv and local_net(srv): try: lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid) except CommandError, e: log(self.module_name, "disconnect failed: ", self.name) e.dump() cleanup_error(e.rc) try: lctl.cleanup(self.name, self.uuid) except CommandError, e: log(self.module_name, "cleanup failed: ", self.name) e.dump() cleanup_error(e.rc) def add_module(self, dev_dir, modname): """Append a module to list of modules to load.""" self.kmodule_list.append((dev_dir, modname)) def mod_loaded(self, modname): """Check if a module is already loaded. Look in /proc/modules for it.""" fp = open('/proc/modules') lines = fp.readlines() fp.close() # please forgive my tired fingers for this one ret = filter(lambda word, mod=modname: word == mod, map(lambda line: string.split(line)[0], lines)) return ret def load_module(self): """Load all the modules in the list in the order they appear.""" for dev_dir, mod in self.kmodule_list: # (rc, out) = run ('/sbin/lsmod | grep -s', mod) if self.mod_loaded(mod) and not config.noexec(): continue log ('loading module:', mod) if config.src_dir(): module = find_module(config.src_dir(),dev_dir, mod) if not module: panic('module not found:', mod) (rc, out) = run('/sbin/insmod', module) if rc: raise CommandError('insmod', out, rc) else: (rc, out) = run('/sbin/modprobe', mod) if rc: raise CommandError('modprobe', out, rc) def cleanup_module(self): """Unload the modules in the list in reverse order.""" rev = self.kmodule_list rev.reverse() for dev_dir, mod in rev: if not self.mod_loaded(mod): continue # debug hack if mod == 'portals' and config.dump_file(): lctl.dump(config.dump_file()) log('unloading module:', mod) if config.noexec(): continue (rc, out) = run('/sbin/rmmod', mod) if rc: log('! unable to unload module:', mod) logall(out) class Network(Module): def __init__(self,dom_node): Module.__init__(self, 'NETWORK', dom_node) self.net_type = get_attr(dom_node,'type') self.nid = get_text(dom_node, 'server', '*') self.port = get_text_int(dom_node, 'port', 0) self.send_mem = get_text_int(dom_node, 'send_mem', DEFAULT_TCPBUF) self.recv_mem = get_text_int(dom_node, 'recv_mem', DEFAULT_TCPBUF) if '*' in self.nid: self.nid = get_local_address(self.net_type, self.nid) if not self.nid: panic("unable to set nid for", self.net_type, self.nid) debug("nid:", self.nid) self.add_module('portals/linux/oslib/', 'portals') if node_needs_router(): self.add_module('portals/linux/router', 'kptlrouter') if self.net_type == 'tcp': self.add_module('portals/linux/socknal', 'ksocknal') if self.net_type == 'toe': self.add_module('portals/linux/toenal', 'ktoenal') if self.net_type == 'elan': self.add_module('portals/linux/rqswnal', 'kqswnal') if self.net_type == 'gm': self.add_module('portals/linux/gmnal', 'kgmnal') self.add_module('lustre/obdclass', 'obdclass') self.add_module('lustre/ptlrpc', 'ptlrpc') def prepare(self): self.info(self.net_type, self.nid, self.port) if self.net_type in ('tcp', 'toe'): nal_id = '' # default is socknal if self.net_type == 'toe': nal_id = '-N 4' ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, nal_id, self.port) if ret: raise CommandError(TCP_ACCEPTOR, out, ret) ret = self.dom_node.getElementsByTagName('route_tbl') for a in ret: for r in a.getElementsByTagName('route'): net_type = get_attr(r, 'type') gw = get_attr(r, 'gw') lo = get_attr(r, 'lo') hi = get_attr(r,'hi', '') lctl.add_route(net_type, gw, lo, hi) if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '': srv = nid2server(self.dom_node.parentNode.parentNode, lo) if not srv: panic("no server for nid", lo) else: lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem) lctl.network(self.net_type, self.nid) lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID") def cleanup(self): self.info(self.net_type, self.nid, self.port) ret = self.dom_node.getElementsByTagName('route_tbl') for a in ret: for r in a.getElementsByTagName('route'): lo = get_attr(r, 'lo') hi = get_attr(r,'hi', '') if self.net_type in ('tcp', 'toe') and hi == '': srv = nid2server(self.dom_node.parentNode.parentNode, lo) if not srv: panic("no server for nid", lo) else: try: lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid) except CommandError, e: print "disconnect failed: ", self.name e.dump() cleanup_error(e.rc) try: lctl.del_route(self.net_type, self.nid, lo, hi) except CommandError, e: print "del_route failed: ", self.name e.dump() cleanup_error(e.rc) try: lctl.cleanup("RPCDEV", "RPCDEV_UUID") except CommandError, e: print "cleanup failed: ", self.name e.dump() cleanup_error(e.rc) try: lctl.disconnectAll(self.net_type) except CommandError, e: print "disconnectAll failed: ", self.name e.dump() cleanup_error(e.rc) if self.net_type in ('tcp', 'toe'): # yikes, this ugly! need to save pid in /var/something run("killall acceptor") class LDLM(Module): def __init__(self,dom_node): Module.__init__(self, 'LDLM', dom_node) self.add_module('lustre/ldlm', 'ldlm') def prepare(self): if is_prepared(self.uuid): return self.info() lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid), setup ="") class LOV(Module): def __init__(self,dom_node): Module.__init__(self, 'LOV', dom_node) self.mds_uuid = get_first_ref(dom_node, 'mds') mds= lookup(dom_node.parentNode, self.mds_uuid) self.mds_name = getName(mds) devs = dom_node.getElementsByTagName('devices') if len(devs) > 0: dev_node = devs[0] self.stripe_sz = get_attr_int(dev_node, 'stripesize', 65536) self.stripe_off = get_attr_int(dev_node, 'stripeoffset', 0) self.pattern = get_attr_int(dev_node, 'pattern', 0) self.devlist = get_all_refs(dev_node, 'osc') self.stripe_cnt = get_attr_int(dev_node, 'stripecount', len(self.devlist)) self.add_module('lustre/mdc', 'mdc') self.add_module('lustre/lov', 'lov') def prepare(self): if is_prepared(self.uuid): return for osc_uuid in self.devlist: osc = lookup(self.dom_node.parentNode, osc_uuid) if osc: n = OSC(osc) try: # Ignore connection failures, because the LOV will DTRT with # an unconnected OSC. n.prepare(ignore_connect_failure=1) except CommandError: print "Error preparing OSC %s (inactive)\n" % osc_uuid else: panic('osc not found:', osc_uuid) mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid) self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern, self.devlist, self.mds_name) lctl.newdev(attach="lov %s %s" % (self.name, self.uuid), setup ="%s" % (mdc_uuid)) def cleanup(self): if not is_prepared(self.uuid): return for osc_uuid in self.devlist: osc = lookup(self.dom_node.parentNode, osc_uuid) if osc: n = OSC(osc) n.cleanup() else: panic('osc not found:', osc_uuid) Module.cleanup(self) cleanup_mdc(self.dom_node.parentNode, self.mds_uuid) def load_module(self): for osc_uuid in self.devlist: osc = lookup(self.dom_node.parentNode, osc_uuid) if osc: n = OSC(osc) n.load_module() break else: panic('osc not found:', osc_uuid) Module.load_module(self) def cleanup_module(self): Module.cleanup_module(self) for osc_uuid in self.devlist: osc = lookup(self.dom_node.parentNode, osc_uuid) if osc: n = OSC(osc) n.cleanup_module() break else: panic('osc not found:', osc_uuid) class LOVConfig(Module): def __init__(self,dom_node): Module.__init__(self, 'LOVConfig', dom_node) self.lov_uuid = get_first_ref(dom_node, 'lov') l = lookup(dom_node.parentNode, self.lov_uuid) self.lov = LOV(l) def prepare(self): lov = self.lov self.info(lov.mds_uuid, lov.stripe_cnt, lov.stripe_sz, lov.stripe_off, lov.pattern, lov.devlist, lov.mds_name) lctl.lov_setconfig(lov.uuid, lov.mds_name, lov.stripe_cnt, lov.stripe_sz, lov.stripe_off, lov.pattern, string.join(lov.devlist)) def cleanup(self): #nothing to do here pass class MDS(Module): def __init__(self,dom_node): Module.__init__(self, 'MDS', dom_node) self.devname, self.size = get_device(dom_node) self.fstype = get_text(dom_node, 'fstype') # FIXME: if fstype not set, then determine based on kernel version self.format = get_text(dom_node, 'autoformat', "no") if self.fstype == 'extN': self.add_module('lustre/extN', 'extN') self.add_module('lustre/mds', 'mds') self.add_module('lustre/mds', 'mds_%s' % (self.fstype)) def prepare(self): if is_prepared(self.uuid): return self.info(self.devname, self.fstype, self.format) blkdev = block_dev(self.devname, self.size, self.fstype, self.format) if not is_prepared('MDT_UUID'): lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'), setup ="") lctl.newdev(attach="mds %s %s" % (self.name, self.uuid), setup ="%s %s" %(blkdev, self.fstype)) def cleanup(self): if is_prepared('MDT_UUID'): try: lctl.cleanup("MDT", "MDT_UUID") except CommandError, e: print "cleanup failed: ", self.name e.dump() cleanup_error(e.rc) if not is_prepared(self.uuid): return Module.cleanup(self) clean_loop(self.devname) # Very unusual case, as there is no MDC element in the XML anymore # Builds itself from an MDS node class MDC(Module): def __init__(self,dom_node): self.mds = MDS(dom_node) self.dom_node = dom_node self.module_name = 'MDC' self.kmodule_list = [] self._server = None self._connected = 0 host = socket.gethostname() self.name = 'MDC_%s' % (self.mds.name) self.uuid = '%s_%05x_%05x' % (self.name, int(random.random() * 1048576), int(random.random() * 1048576)) self.lookup_server(self.mds.uuid) self.add_module('lustre/mdc', 'mdc') def prepare(self): if is_prepared(self.uuid): return self.info(self.mds.uuid) srv = self.get_server() lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem) lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid), setup ="%s %s" %(self.mds.uuid, srv.uuid)) class OBD(Module): def __init__(self, dom_node): Module.__init__(self, 'OBD', dom_node) self.obdtype = get_attr(dom_node, 'type') self.devname, self.size = get_device(dom_node) self.fstype = get_text(dom_node, 'fstype') # FIXME: if fstype not set, then determine based on kernel version self.format = get_text(dom_node, 'autoformat', 'yes') if self.fstype == 'extN': self.add_module('lustre/extN', 'extN') self.add_module('lustre/' + self.obdtype, self.obdtype) # need to check /proc/mounts and /etc/mtab before # formatting anything. # FIXME: check if device is already formatted. def prepare(self): if is_prepared(self.uuid): return self.info(self.obdtype, self.devname, self.size, self.fstype, self.format) if self.obdtype == 'obdecho': blkdev = '' else: blkdev = block_dev(self.devname, self.size, self.fstype, self.format) lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid), setup ="%s %s" %(blkdev, self.fstype)) def cleanup(self): if not is_prepared(self.uuid): return Module.cleanup(self) if not self.obdtype == 'obdecho': clean_loop(self.devname) class OST(Module): def __init__(self,dom_node): Module.__init__(self, 'OST', dom_node) self.obd_uuid = get_first_ref(dom_node, 'obd') self.add_module('lustre/ost', 'ost') def prepare(self): if is_prepared(self.uuid): return self.info(self.obd_uuid) lctl.newdev(attach="ost %s %s" % (self.name, self.uuid), setup ="%s" % (self.obd_uuid)) # virtual interface for OSC and LOV class VOSC(Module): def __init__(self,dom_node): Module.__init__(self, 'VOSC', dom_node) if dom_node.nodeName == 'lov': self.osc = LOV(dom_node) else: self.osc = OSC(dom_node) def prepare(self): self.osc.prepare() def cleanup(self): self.osc.cleanup() def load_module(self): self.osc.load_module() def cleanup_module(self): self.osc.cleanup_module() class OSC(Module): def __init__(self,dom_node): Module.__init__(self, 'OSC', dom_node) self.obd_uuid = get_first_ref(dom_node, 'obd') self.ost_uuid = get_first_ref(dom_node, 'ost') self.lookup_server(self.ost_uuid) self.add_module('lustre/osc', 'osc') def prepare(self, ignore_connect_failure = 0): if is_prepared(self.uuid): return self.info(self.obd_uuid, self.ost_uuid) srv = self.get_server() try: if local_net(srv): lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem) else: r = find_route(srv) if r: lctl.add_route_host(r[0], srv.uuid, r[1], r[2]) else: panic ("no route to", srv.nid) except CommandError: if (ignore_connect_failure == 0): pass lctl.newdev(attach="osc %s %s" % (self.name, self.uuid), setup ="%s %s" %(self.obd_uuid, srv.uuid)) def cleanup(self): if not is_prepared(self.uuid): return srv = self.get_server() if local_net(srv): Module.cleanup(self) else: self.info(self.obd_uuid, self.ost_uuid) r = find_route(srv) if r: try: lctl.del_route_host(r[0], srv.uuid, r[1], r[2]) except CommandError, e: print "del_route failed: ", self.name e.dump() cleanup_error(e.rc) Module.cleanup(self) class ECHO_CLIENT(Module): def __init__(self,dom_node): Module.__init__(self, 'ECHO_CLIENT', dom_node) self.add_module('lustre/obdecho', 'obdecho') self.lov_uuid = get_first_ref(dom_node, 'osc') l = lookup(self.dom_node.parentNode, self.lov_uuid) self.osc = VOSC(l) def prepare(self): if is_prepared(self.uuid): return self.osc.prepare() # XXX This is so cheating. -p self.info(self.lov_uuid) lctl.newdev(attach="echo_client %s %s" % (self.name, self.uuid), setup = self.lov_uuid) def cleanup(self): if not is_prepared(self.uuid): return self.osc.cleanup() def load_module(self): self.osc.load_module() Module.load_module(self) def cleanup_module(self): Module.cleanup_module(self) self.osc.cleanup_module() class Mountpoint(Module): def __init__(self,dom_node): Module.__init__(self, 'MTPT', dom_node) self.path = get_text(dom_node, 'path') self.mds_uuid = get_first_ref(dom_node, 'mds') self.lov_uuid = get_first_ref(dom_node, 'osc') self.add_module('lustre/mdc', 'mdc') self.add_module('lustre/llite', 'llite') l = lookup(self.dom_node.parentNode, self.lov_uuid) self.osc = VOSC(l) def prepare(self): self.osc.prepare() mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid) self.info(self.path, self.mds_uuid, self.lov_uuid) cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \ (self.lov_uuid, mdc_uuid, self.path) run("mkdir", self.path) ret, val = run(cmd) if ret: panic("mount failed:", self.path) def cleanup(self): self.info(self.path, self.mds_uuid,self.lov_uuid) if config.force(): (rc, out) = run("umount -f", self.path) else: (rc, out) = run("umount", self.path) if rc: log("umount failed, cleanup will most likely not work.") l = lookup(self.dom_node.parentNode, self.lov_uuid) self.osc.cleanup() cleanup_mdc(self.dom_node.parentNode, self.mds_uuid) def load_module(self): self.osc.load_module() Module.load_module(self) def cleanup_module(self): Module.cleanup_module(self) self.osc.cleanup_module() # ============================================================ # XML processing and query # TODO: Change query funcs to use XPath, which is muc cleaner def get_device(obd): list = obd.getElementsByTagName('device') if len(list) > 0: dev = list[0] dev.normalize(); size = get_attr_int(dev, 'size', 0) return dev.firstChild.data, size return '', 0 # Get the text content from the first matching child # If there is no content (or it is all whitespace), return # the default def get_text(dom_node, tag, default=""): list = dom_node.getElementsByTagName(tag) if len(list) > 0: dom_node = list[0] dom_node.normalize() if dom_node.firstChild: txt = string.strip(dom_node.firstChild.data) if txt: return txt return default def get_text_int(dom_node, tag, default=0): list = dom_node.getElementsByTagName(tag) n = default if len(list) > 0: dom_node = list[0] dom_node.normalize() if dom_node.firstChild: txt = string.strip(dom_node.firstChild.data) if txt: try: n = int(txt) except ValueError: panic("text value is not integer:", txt) return n def get_attr(dom_node, attr, default=""): v = dom_node.getAttribute(attr) if v: return v return default def get_attr_int(dom_node, attr, default=0): n = default v = dom_node.getAttribute(attr) if v: try: n = int(v) except ValueError: panic("attr value is not integer", v) return n def get_first_ref(dom_node, tag): """ Get the first uuidref of the type TAG. Used one only one is expected. Returns the uuid.""" uuid = None refname = '%s_ref' % tag list = dom_node.getElementsByTagName(refname) if len(list) > 0: uuid = getRef(list[0]) return uuid def get_all_refs(dom_node, tag): """ Get all the refs of type TAG. Returns list of uuids. """ uuids = [] refname = '%s_ref' % tag list = dom_node.getElementsByTagName(refname) if len(list) > 0: for i in list: uuids.append(getRef(i)) return uuids def get_ost_net(dom_node, uuid): ost = lookup(dom_node, uuid) uuid = get_first_ref(ost, 'network') if not uuid: return None return lookup(dom_node, uuid) def nid2server(dom_node, nid): netlist = dom_node.getElementsByTagName('network') for net_node in netlist: if get_text(net_node, 'server') == nid: return Network(net_node) return None def lookup(dom_node, uuid): for n in dom_node.childNodes: if n.nodeType == n.ELEMENT_NODE: if getUUID(n) == uuid: return n else: n = lookup(n, uuid) if n: return n return None # Get name attribute of dom_node def getName(dom_node): return dom_node.getAttribute('name') def getRef(dom_node): return dom_node.getAttribute('uuidref') # Get name attribute of dom_node def getUUID(dom_node): return dom_node.getAttribute('uuid') # the tag name is the service type # fixme: this should do some checks to make sure the dom_node is a service def getServiceType(dom_node): return dom_node.nodeName # # determine what "level" a particular node is at. # the order of iniitailization is based on level. def getServiceLevel(dom_node): type = getServiceType(dom_node) ret=0; if type in ('network',): ret = 10 elif type in ('device', 'ldlm'): ret = 20 elif type in ('obd', 'mdd'): ret = 30 elif type in ('mds','ost'): ret = 40 elif type in ('mdc','osc'): ret = 50 elif type in ('lov', 'lovconfig'): ret = 60 elif type in ('mountpoint', 'echo_client'): ret = 70 if ret < config.minlevel() or ret > config.maxlevel(): ret = 0 return ret # # return list of services in a profile. list is a list of tuples # [(level, dom_node),] def getServices(lustreNode, profileNode): list = [] for n in profileNode.childNodes: if n.nodeType == n.ELEMENT_NODE: servNode = lookup(lustreNode, getRef(n)) if not servNode: print n panic('service not found: ' + getRef(n)) level = getServiceLevel(servNode) if level > 0: list.append((level, servNode)) list.sort() return list def getByName(lustreNode, name, tag): ndList = lustreNode.getElementsByTagName(tag) for nd in ndList: if getName(nd) == name: return nd return None ############################################################ # MDC UUID hack - # FIXME: clean this mess up! # saved_mdc = {} def prepare_mdc(dom_node, mds_uuid): global saved_mdc mds_node = lookup(dom_node, mds_uuid); if not mds_node: panic("no mds:", mds_uuid) if saved_mdc.has_key(mds_uuid): return saved_mdc[mds_uuid] mdc = MDC(mds_node) mdc.prepare() saved_mdc[mds_uuid] = mdc.uuid return mdc.uuid def cleanup_mdc(dom_node, mds_uuid): global saved_mdc mds_node = lookup(dom_node, mds_uuid); if not mds_node: panic("no mds:", mds_uuid) if not saved_mdc.has_key(mds_uuid): mdc = MDC(mds_node) mdc.cleanup() saved_mdc[mds_uuid] = mdc.uuid ############################################################ # routing ("rooting") # routes = [] local_node = [] router_flag = 0 def init_node(dom_node): global local_node, router_flag netlist = dom_node.getElementsByTagName('network') for dom_net in netlist: type = get_attr(dom_net, 'type') gw = get_text(dom_net, 'server') local_node.append((type, gw)) def node_needs_router(): return router_flag def get_routes(type, gw, dom_net): """ Return the routes as a list of tuples of the form: [(type, gw, lo, hi),]""" res = [] tbl = dom_net.getElementsByTagName('route_tbl') for t in tbl: routes = t.getElementsByTagName('route') for r in routes: lo = get_attr(r, 'lo') hi = get_attr(r, 'hi', '') res.append((type, gw, lo, hi)) return res def init_route_config(lustre): """ Scan the lustre config looking for routers. Build list of routes. """ global routes, router_flag routes = [] list = lustre.getElementsByTagName('node') for node in list: if get_attr(node, 'router'): router_flag = 1 for (local_type, local_nid) in local_node: gw = None netlist = node.getElementsByTagName('network') for dom_net in netlist: if local_type == get_attr(dom_net, 'type'): gw = get_text(dom_net, 'server') break if not gw: continue for dom_net in netlist: if local_type != get_attr(dom_net, 'type'): for route in get_routes(local_type, gw, dom_net): routes.append(route) def local_net(net): global local_node for iface in local_node: if net.net_type == iface[0]: return 1 return 0 def find_route(net): global local_node, routes frm_type = local_node[0][0] to_type = net.net_type to = net.nid debug ('looking for route to', to_type,to) for r in routes: if r[2] == to: return r return None ############################################################ # lconf level logic # Start a service. def startService(dom_node, module_flag): type = getServiceType(dom_node) debug('Service:', type, getName(dom_node), getUUID(dom_node)) # there must be a more dynamic way of doing this... n = None if type == 'ldlm': n = LDLM(dom_node) elif type == 'lov': n = LOV(dom_node) elif type == 'lovconfig': n = LOVConfig(dom_node) elif type == 'network': n = Network(dom_node) elif type == 'obd': n = OBD(dom_node) elif type == 'ost': n = OST(dom_node) elif type == 'mds': n = MDS(dom_node) elif type == 'osc': n = VOSC(dom_node) elif type == 'mdc': n = MDC(dom_node) elif type == 'mountpoint': n = Mountpoint(dom_node) elif type == 'echo_client': n = ECHO_CLIENT(dom_node) else: panic ("unknown service type:", type) if module_flag: if config.nomod(): return if config.cleanup(): n.cleanup_module() else: n.load_module() else: if config.nosetup(): return if config.cleanup(): n.cleanup() else: n.prepare() # # Prepare the system to run lustre using a particular profile # in a the configuration. # * load & the modules # * setup networking for the current node # * make sure partitions are in place and prepared # * initialize devices with lctl # Levels is important, and needs to be enforced. def startProfile(lustreNode, profileNode, module_flag): if not profileNode: panic("profile:", profile, "not found.") services = getServices(lustreNode, profileNode) if config.cleanup(): services.reverse() for s in services: startService(s[1], module_flag) # # Load profile for def doHost(lustreNode, hosts): global routes dom_node = None for h in hosts: dom_node = getByName(lustreNode, h, 'node') if dom_node: break if not dom_node: print 'No host entry found.' return if not get_attr(dom_node, 'router'): init_node(dom_node) init_route_config(lustreNode) else: global router_flag router_flag = 1 # Two step process: (1) load modules, (2) setup lustre # if not cleaning, load modules first. module_flag = not config.cleanup() reflist = dom_node.getElementsByTagName('profile') for profile in reflist: startProfile(lustreNode, profile, module_flag) if not config.cleanup(): sys_set_debug_path() script = config.gdb_script() run(lctl.lctl, ' modules >', script) if config.gdb(): # dump /tmp/ogdb and sleep/pause here log ("The GDB module script is in", script) time.sleep(5) module_flag = not module_flag for profile in reflist: startProfile(lustreNode, profile, module_flag) ############################################################ # Command line processing # def parse_cmdline(argv): short_opts = "hdnvf" long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb", "portals=", "makeldiff", "cleanup", "noexec", "help", "node=", "nomod", "nosetup", "dump=", "force", "minlevel=", "maxlevel="] opts = [] args = [] try: opts, args = getopt.getopt(argv, short_opts, long_opts) except getopt.error: print "invalid opt" usage() for o, a in opts: if o in ("-h", "--help"): usage() if o in ("-d","--cleanup"): config.cleanup(1) if o in ("-v", "--verbose"): config.verbose(1) if o in ("-n", "--noexec"): config.noexec(1) config.verbose(1) if o == "--portals": config.portals = a if o == "--lustre": config.lustre = a if o == "--reformat": config.reformat(1) if o == "--node": config.node(a) if o == "--gdb": config.gdb(1) if o == "--nomod": config.nomod(1) if o == "--nosetup": config.nosetup(1) if o == "--dump": config.dump_file(a) if o in ("-f", "--force"): config.force(1) if o in ("--minlevel",): config.minlevel(a) if o in ("--maxlevel",): config.maxlevel(a) return args def fetch(url): import urllib data = "" try: s = urllib.urlopen(url) data = s.read() except: usage() return data def setupModulePath(cmd): base = os.path.dirname(cmd) if os.access(base+"/Makefile", os.R_OK): config.src_dir(base + "/../../") def sys_set_debug_path(): debug("debug path: ", config.debug_path()) if config.noexec(): return try: fp = open('/proc/sys/portals/debug_path', 'w') fp.write(config.debug_path()) fp.close() except IOError, e: print e #/proc/sys/net/core/rmem_max #/proc/sys/net/core/wmem_max def sys_set_netmem_max(path, max): debug("setting", path, "to at least", max) if config.noexec(): return fp = open(path) str = fp.readline() fp.close cur = int(str) if max > cur: fp = open(path, 'w') fp.write('%d\n' %(max)) fp.close() def sys_make_devices(): if not os.access('/dev/portals', os.R_OK): run('mknod /dev/portals c 10 240') if not os.access('/dev/obd', os.R_OK): run('mknod /dev/obd c 10 241') # Add dir to the global PATH, if not already there. def add_to_path(new_dir): syspath = string.split(os.environ['PATH'], ':') if new_dir in syspath: return os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin') # ensure basic elements are in the system path def sanitise_path(): for dir in DEFAULT_PATH: add_to_path(dir) # Initialize or shutdown lustre according to a configuration file # * prepare the system for lustre # * configure devices with lctl # Shutdown does steps in reverse # def main(): global TCP_ACCEPTOR, lctl, MAXTCPBUF host = socket.gethostname() # the PRNG is normally seeded with time(), which is not so good for starting # time-synchronized clusters input = open('/dev/urandom', 'r') if not input: print 'Unable to open /dev/urandom!' sys.exit(1) seed = input.read(32) input.close() random.seed(seed) sanitise_path() args = parse_cmdline(sys.argv[1:]) if len(args) > 0: if not os.access(args[0], os.R_OK): print 'File not found or readable:', args[0] sys.exit(1) dom = xml.dom.minidom.parse(args[0]) elif config.url(): xmldata = fetch(config.url()) dom = xml.dom.minidom.parseString(xmldata) else: usage() node_list = [] if config.node(): node_list.append(config.node()) else: if len(host) > 0: node_list.append(host) node_list.append('localhost') debug("configuring for host: ", node_list) if len(host) > 0: config._debug_path = config._debug_path + '-' + host config._gdb_script = config._gdb_script + '-' + host TCP_ACCEPTOR = find_prog('acceptor') if not TCP_ACCEPTOR: if config.noexec(): TCP_ACCEPTOR = 'acceptor' debug('! acceptor not found') else: panic('acceptor not found') lctl = LCTLInterface('lctl') setupModulePath(sys.argv[0]) sys_make_devices() sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF) sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF) doHost(dom.documentElement, node_list) if __name__ == "__main__": try: main() except LconfError, e: print e except CommandError, e: e.dump() sys.exit(e.rc) if first_cleanup_error: sys.exit(first_cleanup_error)