#!/usr/bin/env python # # Copyright (C) 2002 Cluster File Systems, Inc. # Author: Robert Read # This file is part of Lustre, http://www.lustre.org. # # Lustre is free software; you can redistribute it and/or # modify it under the terms of version 2 of the GNU General Public # License as published by the Free Software Foundation. # # Lustre is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Lustre; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # lconf - lustre configuration tool # # lconf is the main driver script for starting and stopping # lustre filesystem services. # # Based in part on the XML obdctl modifications done by Brian Behlendorf import sys, getopt import string, os, stat, popen2, socket import re, exceptions import xml.dom.minidom # Global parameters TCP_ACCEPTOR = '' options = {} # # Maximum number of devices to search for. # (the /dev/loop* nodes need to be created beforehand) MAX_LOOP_DEVICES = 256 def usage(): print """usage: lconf config.xml config.xml Lustre configuration in xml format. --get URL to fetch a config file -v | --verbose Print system commands as they are run -d | --debug Print system commands, but does not run them --host Load config for --cleanup Cleans up config. (Shutdown) -h | --help Print this help """ TODO = """ --ldap server LDAP server with lustre config database --reformat Reformat all devices (will confirm) --lustre="src dir" Base directory of lustre sources. Used to search for modules. --portals=src Portals source --makeldiff Translate xml source to LDIFF --iam myname ?? """ sys.exit() # ============================================================ # debugging and error funcs def fixme(msg = "this feature"): raise RuntimeError, msg + ' not implmemented yet.' def panic(*args): msg = string.join(map(str,args)) print msg raise RuntimeError, msg def log(*args): msg = string.join(map(str,args)) print msg def logall(msgs): for s in msgs: print string.strip(s) def debug(*args): msg = string.join(map(str,args)) if isverbose(): print msg def isverbose(): return options.has_key('verbose') and options['verbose'] == 1 def isnotouch(): return options.has_key('debug') and options['debug'] == 1 # ============================================================ # locally defined exceptions class CommandError (exceptions.Exception): def __init__(self, args=None): self.args = args # ============================================================ # handle lctl interface class LCTLInterface: """ Manage communication with lctl """ def __init__(self, cmd): """ Initialize close by finding the lctl binary. """ self.lctl = find_prog(cmd) if not self.lctl: if isnotouch(): debug('! lctl not found') self.lctl = 'lctl' else: raise CommandError, "unable to find lctl binary." def run(self, cmds): """ run lctl the cmds are written to stdin of lctl lctl doesn't return errors when run in script mode, so stderr is checked should modify command line to accept multiple commands, or create complex command line options """ debug("+", self.lctl, cmds) if isnotouch(): return ([], 0) p = popen2.Popen3(self.lctl, 1) p.tochild.write(cmds + "\n") p.tochild.close() out = p.fromchild.readlines() ret = p.poll() for l in out: debug('lctl:',string.strip(l)) err = p.childerr.readlines() if ret or len(err): log (self.lctl, "error:", ret) logall(err) raise CommandError, err return ret, out def network(self, net, nid): """ initialized network and add "self" """ # Idea: "mynid" could be used for all network types to add "self," and then # this special case would be gone and the "self" hack would be hidden. if net == 'tcp': cmds = """ network %s mynid %s add_uuid self %s quit""" % (net, nid, nid) else: cmds = """ network %s add_uuid self %s quit""" % (net, nid) self.run(cmds) # create a new connection def connect(self, net, nid, port, servuuid, send_buf, read_buf): # XXX: buf size params not used yet cmds = """ network %s connect %s %d add_uuid %s %s quit""" % (net, nid, port, servuuid, nid) self.run(cmds) # create a new connection def add_route(self, net, to, via): cmds = """ """ #self.run(cmds) # create a new device with lctl def disconnect(self, net, nid, port, servuuid): cmds = """ network %s disconnect %s quit""" % (net, nid) self.run(cmds) # create a new device with lctl def newdev(self, attach, setup = ""): cmds = """ newdev attach %s setup %s quit""" % (attach, setup) self.run(cmds) # cleanup a device def cleanup(self, name, uuid): cmds = """ device $%s cleanup detach quit""" % (name) self.run(cmds) # create an lov def lovconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist): cmds = """ device $%s probe lovconfig %s %d %d %d %s %s quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist) self.run(cmds) # ============================================================ # Various system-level functions # (ideally moved to their own module) # Run a command and return the output and status. # stderr is sent to /dev/null, could use popen3 to # save it if necessary def run(*args): cmd = string.join(map(str,args)) debug ("+", cmd) if isnotouch(): return (0, []) f = os.popen(cmd + ' 2>&1') out = f.readlines() ret = f.close() if ret: ret = ret >> 8 else: ret = 0 return (ret, out) # Run a command in the background. def run_daemon(*args): cmd = string.join(map(str,args)) debug ("+", cmd) if isnotouch(): return 0 f = os.popen(cmd + ' 2>&1') ret = f.close() if ret: ret = ret >> 8 else: ret = 0 return ret # Determine full path to use for an external command # searches dirname(argv[0]) first, then PATH def find_prog(cmd): syspath = string.split(os.environ['PATH'], ':') cmdpath = os.path.dirname(sys.argv[0]) syspath.insert(0, cmdpath); syspath.insert(0, os.path.join(cmdpath, '../../portals/linux/utils/')) for d in syspath: prog = os.path.join(d,cmd) if os.access(prog, os.X_OK): return prog return '' # is the path a block device? def is_block(path): s = () try: s = os.stat(path) except OSError: return 0 return stat.S_ISBLK(s[stat.ST_MODE]) # build fs according to type # fixme: dangerous def mkfs(fstype, dev): if(fstype in ('ext3', 'extN')): mkfs = 'mkfs.ext2 -j -b 4096' else: print 'unsupported fs type: ', fstype if not is_block(dev): force = '-F' else: force = '' (ret, out) = run (mkfs, force, dev) if ret: panic("Unable to build fs:", dev) # enable hash tree indexing on fs if fstype == 'extN': htree = 'echo "feature FEATURE_C5" | debugfs -w' (ret, out) = run (htree, dev) if ret: panic("Unable to enable htree:", dev) # some systems use /dev/loopN, some /dev/loop/N def loop_base(): import re loop = '/dev/loop' if not os.access(loop + str(0), os.R_OK): loop = loop + '/' if not os.access(loop + str(0), os.R_OK): panic ("can't access loop devices") return loop # find loop device assigned to thefile def find_loop(file): loop = loop_base() for n in xrange(0, MAX_LOOP_DEVICES): dev = loop + str(n) if os.access(dev, os.R_OK): (stat, out) = run('losetup', dev) if (out and stat == 0): m = re.search(r'\((.*)\)', out[0]) if m and file == m.group(1): return dev else: break return '' # create file if necessary and assign the first free loop device def init_loop(file, size, fstype): dev = find_loop(file) if dev: print 'WARNING file:', file, 'already mapped to', dev return dev if not os.access(file, os.R_OK | os.W_OK): run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file)) loop = loop_base() # find next free loop for n in xrange(0, MAX_LOOP_DEVICES): dev = loop + str(n) if os.access(dev, os.R_OK): (stat, out) = run('losetup', dev) if (stat): run('losetup', dev, file) return dev else: print "out of loop devices" return '' print "out of loop devices" return '' # undo loop assignment def clean_loop(file): dev = find_loop(file) if dev: ret, out = run('losetup -d', dev) if ret: log('unable to clean loop device:', dev, 'for file:', file) logall(out) # initialize a block device if needed def block_dev(dev, size, fstype, format): if isnotouch(): return dev if not is_block(dev): dev = init_loop(dev, size, fstype) if (format == 'yes'): mkfs(fstype, dev) return dev # ============================================================ # Classes to prepare and cleanup the various objects # class Module: """ Base class for the rest of the modules. The default cleanup method is defined here, as well as some utilitiy funcs. """ def __init__(self, tag_name, node): self.dom_node = node self.tag_name = tag_name self.name = node.getAttribute('name') self.uuid = node.getAttribute('uuid') def info(self, *args): msg = string.join(map(str,args)) print self.tag_name + ":", self.name, self.uuid, msg def cleanup(self): """ default cleanup, used for most modules """ self.info() try: lctl.cleanup(self.name, self.uuid) except CommandError: print "cleanup failed: ", self.name class Network(Module): def __init__(self,node): Module.__init__(self, 'NETWORK', node) self.net_type = node.getAttribute('type') self.nid = getText(node, 'server', "") self.port = int(getText(node, 'port', 0)) self.send_buf = int(getText(node, 'send_buf', 0)) self.read_buf = int(getText(node, 'read_buf', 0)) def prepare(self): self.info(self.net_type, self.nid, self.port) if self.net_type == 'tcp': ret = run_daemon(TCP_ACCEPTOR, self.port) if ret: print "error:", ret raise CommandError, "cannot run acceptor" lctl.network(self.net_type, self.nid) lctl.newdev(attach = "ptlrpc RPCDEV") def cleanup(self): self.info(self.net_type, self.nid, self.port) try: lctl.cleanup("RPCDEV", "") except CommandError: print "cleanup failed: ", self.name if self.net_type == 'tcp': # yikes, this ugly! need to save pid in /var/something run("killall acceptor") class LDLM(Module): def __init__(self,node): Module.__init__(self, 'LDLM', node) def prepare(self): self.info() lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid), setup ="") class LOV(Module): def __init__(self,node): Module.__init__(self, 'LOV', node) devs = node.getElementsByTagName('devices')[0] self.stripe_sz = int(devs.getAttribute('stripesize')) self.stripe_off = int(devs.getAttribute('stripeoffset')) self.pattern = int(devs.getAttribute('pattern')) mdsref = node.getElementsByTagName('mds_ref')[0] self.mdsuuid = mdsref.getAttribute('uuidref') mds= lookup(node.parentNode, self.mdsuuid) self.mdsname = getName(mds) devlist = "" stripe_cnt = 0 for child in devs.childNodes: if child.nodeName == 'osc_ref': devlist = devlist + child.getAttribute('uuidref') + " " stripe_cnt = stripe_cnt + 1 self.devlist = devlist self.stripe_cnt = stripe_cnt def prepare(self): self.info(self.mdsuuid, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern, self.devlist, self.mdsname) lctl.lovconfig(self.uuid, self.mdsname, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern, self.devlist) def cleanup(self): pass class MDS(Module): def __init__(self,node): Module.__init__(self, 'MDS', node) self.devname, self.size = getDevice(node) self.fstype = getText(node, 'fstype') self.format = getText(node, 'autoformat', "no") def prepare(self): self.info(self.devname, self.fstype, self.format) blkdev = block_dev(self.devname, self.size, self.fstype, self.format) lctl.newdev(attach="mds %s %s" % (self.name, self.uuid), setup ="%s %s" %(blkdev, self.fstype)) def cleanup(self): Module.cleanup(self) clean_loop(self.devname) class MDC(Module): def __init__(self,node): Module.__init__(self, 'MDC', node) ref = node.getElementsByTagName('mds_ref')[0] self.mds_uuid = ref.getAttribute('uuidref') def prepare(self): self.info(self.mds_uuid) mds = lookup(self.dom_node.parentNode, self.mds_uuid) if mds == None: panic(self.mdsuuid, "not found.") net = get_ost_net(self.dom_node.parentNode, self.mds_uuid) srv = Network(net) lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_buf, srv.read_buf) lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid), setup ="%s %s" %(self.mds_uuid, srv.uuid)) def cleanup(self): self.info(self.mds_uuid) net = get_ost_net(self.dom_node.parentNode, self.mds_uuid) srv = Network(net) try: lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid) lctl.cleanup(self.name, self.uuid) except CommandError: print "cleanup failed: ", self.name class OBD(Module): def __init__(self, node): Module.__init__(self, 'OBD', node) self.obdtype = node.getAttribute('type') self.devname, self.size = getDevice(node) self.fstype = getText(node, 'fstype') self.format = getText(node, 'autoformat', 'yes') # need to check /proc/mounts and /etc/mtab before # formatting anything. # FIXME: check if device is already formatted. def prepare(self): self.info(self.obdtype, self.devname, self.size, self.fstype, self.format) blkdev = block_dev(self.devname, self.size, self.fstype, self.format) lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid), setup ="%s %s" %(blkdev, self.fstype)) def cleanup(self): Module.cleanup(self) clean_loop(self.devname) class OST(Module): def __init__(self,node): Module.__init__(self, 'OST', node) ref = node.getElementsByTagName('obd_ref')[0] self.obd_uuid = ref.getAttribute('uuidref') def prepare(self): self.info(self.obd_uuid) lctl.newdev(attach="ost %s %s" % (self.name, self.uuid), setup ="%s" % (self.obd_uuid)) class OSC(Module): def __init__(self,node): Module.__init__(self, 'OSC', node) ref = node.getElementsByTagName('obd_ref')[0] self.obd_uuid = ref.getAttribute('uuidref') ref = node.getElementsByTagName('ost_ref')[0] self.ost_uuid = ref.getAttribute('uuidref') def prepare(self): self.info(self.obd_uuid, self.ost_uuid) net = get_ost_net(self.dom_node.parentNode, self.ost_uuid) srv = Network(net) lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_buf, srv.read_buf) lctl.newdev(attach="osc %s %s" % (self.name, self.uuid), setup ="%s %s" %(self.obd_uuid, srv.uuid)) def cleanup(self): self.info(self.obd_uuid, self.ost_uuid) net_uuid = get_ost_net(self.dom_node.parentNode, self.ost_uuid) srv = Network(net) try: lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid) lctl.cleanup(self.name, self.uuid) except CommandError: print "cleanup failed: ", self.name class Mountpoint(Module): def __init__(self,node): Module.__init__(self, 'MTPT', node) self.path = getText(node, 'path') ref = node.getElementsByTagName('mdc_ref')[0] self.mdc_uuid = ref.getAttribute('uuidref') ref = node.getElementsByTagName('osc_ref')[0] self.lov_uuid = ref.getAttribute('uuidref') def prepare(self): l = lookup(self.dom_node.parentNode, self.lov_uuid) if l.nodeName == 'lov': dev = LOV(l) for osc_uuid in string.split(dev.devlist): osc = lookup(self.dom_node.parentNode, osc_uuid) if osc: n = OSC(osc) n.prepare() else: panic('osc not found:', osc_uuid) else: dev = OSC(l) dev.prepare() self.info(self.path, self.mdc_uuid,self.lov_uuid) lctl.newdev(attach="lov %s %s" % (dev.name, dev.uuid), setup ="%s" % (self.mdc_uuid)) cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \ (self.lov_uuid, self.mdc_uuid, self.path) run("mkdir", self.path) ret, val = run(cmd) if ret: panic("mount failed:", self.path) def cleanup(self): self.info(self.path, self.mdc_uuid,self.lov_uuid) run("umount", self.path) # ============================================================ # XML processing and query # TODO: Change query funcs to use XPath, which is muc cleaner def getDevice(obd): dev = obd.getElementsByTagName('device')[0] dev.normalize(); try: size = int(dev.getAttribute('size')) except ValueError: size = 0 return dev.firstChild.data, size # Get the text content from the first matching child def getText(node, tag, default=""): list = node.getElementsByTagName(tag) if len(list) > 0: node = list[0] node.normalize() return node.firstChild.data else: return default def get_ost_net(node, uuid): ost = lookup(node, uuid) list = ost.getElementsByTagName('network_ref') if list: uuid = list[0].getAttribute('uuidref') else: return None return lookup(node, uuid) def lookup(node, uuid): for n in node.childNodes: if n.nodeType == n.ELEMENT_NODE: if getUUID(n) == uuid: return n else: n = lookup(n, uuid) if n: return n return None # Get name attribute of node def getName(node): return node.getAttribute('name') def getRef(node): return node.getAttribute('uuidref') # Get name attribute of node def getUUID(node): return node.getAttribute('uuid') # the tag name is the service type # fixme: this should do some checks to make sure the node is a service def getServiceType(node): return node.nodeName # # determine what "level" a particular node is at. # the order of iniitailization is based on level. objects # are assigned a level based on type: # net,devices,ldlm:1, obd, mdd:2 mds,ost:3 osc,mdc:4 mounts:5 def getServiceLevel(node): type = getServiceType(node) if type in ('network',): return 1 if type in ('device', 'ldlm'): return 2 elif type in ('obd', 'mdd'): return 3 elif type in ('mds','ost'): return 4 elif type in ('mdc','osc'): return 5 elif type in ('lov',): return 6 elif type in ('mountpoint',): return 7 return 0 # # return list of services in a profile. list is a list of tuples # [(level, node),] def getServices(lustreNode, profileNode): list = [] for n in profileNode.childNodes: if n.nodeType == n.ELEMENT_NODE: servNode = lookup(lustreNode, getRef(n)) if not servNode: print n panic('service not found: ' + getRef(n)) level = getServiceLevel(servNode) list.append((level, servNode)) list.sort() return list def getByName(lustreNode, tag, name): ndList = lustreNode.getElementsByTagName(tag) for nd in ndList: if getName(nd) == name: return nd return None # ============================================================ # lconf level logic # Start a service. def startService(node, cleanFlag): type = getServiceType(node) debug('Starting service:', type, getName(node), getUUID(node)) # there must be a more dynamic way of doing this... n = None if type == 'ldlm': n = LDLM(node) elif type == 'lov': n = LOV(node) elif type == 'network': n = Network(node) elif type == 'obd': n = OBD(node) elif type == 'ost': n = OST(node) elif type == 'mds': n = MDS(node) elif type == 'osc': n = OSC(node) elif type == 'mdc': n = MDC(node) elif type == 'mountpoint': n = Mountpoint(node) else: panic ("unknown service type:", type) if cleanFlag: n.cleanup() else: n.prepare() # # Prepare the system to run lustre using a particular profile # in a the configuration. # * load & the modules # * setup networking for the current node # * make sure partitions are in place and prepared # * initialize devices with lctl # Levels is important, and needs to be enforced. def startProfile(lustreNode, profileNode, cleanFlag): if not profileNode: panic("profile:", profile, "not found.") services = getServices(lustreNode, profileNode) if cleanFlag: services.reverse() for s in services: startService(s[1], cleanFlag) # # Load profile for def doHost(lustreNode, hosts, cleanFlag): node = None for h in hosts: node = getByName(lustreNode, 'node', h) if node: break if not node: print 'No host entry found.' return reflist = node.getElementsByTagName('profile') for profile in reflist: startProfile(lustreNode, profile, cleanFlag) # Command line processing # def parse_cmdline(argv): short_opts = "hdv" long_opts = ["ldap", "reformat", "lustre=", "verbose", "portals=", "makeldiff", "cleanup", "iam=", "help", "debug", "host=", "get="] opts = [] args = [] global options try: opts, args = getopt.getopt(argv, short_opts, long_opts) except getopt.GetoptError: print "invalid opt" usage() for o, a in opts: if o in ("-h", "--help"): usage() if o == "--cleanup": options['cleanup'] = 1 if o in ("-v", "--verbose"): options['verbose'] = 1 if o in ("-d", "--debug"): options['debug'] = 1 options['verbose'] = 1 if o == "--portals": options['portals'] = a if o == "--lustre": options['lustre'] = a if o == "--reformat": options['reformat'] = 1 if o == "--host": options['hostname'] = [a] if o == "--get": options['url'] = a return args def fetch(url): import urllib data = "" try: s = urllib.urlopen(url) data = s.read() except: usage() return data # Initialize or shutdown lustre according to a configuration file # * prepare the system for lustre # * configure devices with lctl # Shutdown does steps in reverse # def main(): global options, TCP_ACCEPTOR, lctl args = parse_cmdline(sys.argv[1:]) if len(args) > 0: if not os.access(args[0], os.R_OK | os.W_OK): print 'File not found:', args[0] sys.exit(1) dom = xml.dom.minidom.parse(args[0]) elif options.has_key('url'): xmldata = fetch(options['url']) dom = xml.dom.minidom.parseString(xmldata) else: usage() if not options.has_key('hostname'): options['hostname'] = [] host = socket.gethostname() if len(host) > 0: options['hostname'].append(host) options['hostname'].append('localhost') print "configuring for host: ", options['hostname'] TCP_ACCEPTOR = find_prog('acceptor') if not TCP_ACCEPTOR: if isnotouch(): TCP_ACCEPTOR = 'acceptor' debug('! acceptor not found') else: panic('acceptor not found') lctl = LCTLInterface('lctl') doHost(dom.documentElement, options['hostname'], options.has_key('cleanup') ) if __name__ == "__main__": try: main() except RuntimeError: pass except CommandError: print '' pass