#!/usr/bin/env python # # Copyright (C) 2002-2003 Cluster File Systems, Inc. # Authors: Robert Read # Mike Shaver # This file is part of Lustre, http://www.lustre.org. # # Lustre is free software; you can redistribute it and/or # modify it under the terms of version 2 of the GNU General Public # License as published by the Free Software Foundation. # # Lustre is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Lustre; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # lconf - lustre configuration tool # # lconf is the main driver script for starting and stopping # lustre filesystem services. # # Based in part on the XML obdctl modifications done by Brian Behlendorf import sys, getopt, types import string, os, stat, popen2, socket, time, random, fcntl, select import re, exceptions, signal, traceback import xml.dom.minidom if sys.version[0] == '1': from FCNTL import F_GETFL, F_SETFL else: from fcntl import F_GETFL, F_SETFL PYMOD_DIR = "/usr/lib/lustre/python" def development_mode(): base = os.path.dirname(sys.argv[0]) if os.access(base+"/Makefile.am", os.R_OK): return 1 return 0 if not development_mode(): sys.path.append(PYMOD_DIR) import Lustre # Global parameters MAXTCPBUF = 16777216 DEFAULT_TCPBUF = 8388608 DEFAULT_PORT = 988 # # Maximum number of devices to search for. # (the /dev/loop* nodes need to be created beforehand) MAX_LOOP_DEVICES = 256 PORTALS_DIR = 'portals' # Needed to call lconf --record CONFIG_FILE = "" # Please keep these in sync with the values in portals/kp30.h ptldebug_names = { "trace" : (1 << 0), "inode" : (1 << 1), "super" : (1 << 2), "ext2" : (1 << 3), "malloc" : (1 << 4), "cache" : (1 << 5), "info" : (1 << 6), "ioctl" : (1 << 7), "blocks" : (1 << 8), "net" : (1 << 9), "warning" : (1 << 10), "buffs" : (1 << 11), "other" : (1 << 12), "dentry" : (1 << 13), "portals" : (1 << 14), "page" : (1 << 15), "dlmtrace" : (1 << 16), "error" : (1 << 17), "emerg" : (1 << 18), "ha" : (1 << 19), "rpctrace" : (1 << 20), "vfstrace" : (1 << 21), "reada" : (1 << 22), } subsystem_names = { "undefined" : (1 << 0), "mdc" : (1 << 1), "mds" : (1 << 2), "osc" : (1 << 3), "ost" : (1 << 4), "class" : (1 << 5), "log" : (1 << 6), "llite" : (1 << 7), "rpc" : (1 << 8), "mgmt" : (1 << 9), "portals" : (1 << 10), "socknal" : (1 << 11), "qswnal" : (1 << 12), "pinger" : (1 << 13), "filter" : (1 << 14), "ptlbd" : (1 << 15), "echo" : (1 << 16), "ldlm" : (1 << 17), "lov" : (1 << 18), "gmnal" : (1 << 19), "ptlrouter" : (1 << 20), "cobd" : (1 << 21), "ibnal" : (1 << 22), } first_cleanup_error = 0 def cleanup_error(rc): global first_cleanup_error if not first_cleanup_error: first_cleanup_error = rc # ============================================================ # debugging and error funcs def fixme(msg = "this feature"): raise Lustre.LconfError, msg + ' not implmemented yet.' def panic(*args): msg = string.join(map(str,args)) if not config.noexec: raise Lustre.LconfError(msg) else: print "! " + msg def log(*args): msg = string.join(map(str,args)) print msg def logall(msgs): for s in msgs: print string.strip(s) def debug(*args): if config.verbose: msg = string.join(map(str,args)) print msg # ack, python's builtin int() does not support '0x123' syntax. # eval can do it, although what a hack! def my_int(s): try: if s[0:2] == '0x': return eval(s, {}, {}) else: return int(s) except SyntaxError, e: raise ValueError("not a number") except NameError, e: raise ValueError("not a number") # ============================================================ # locally defined exceptions class CommandError (exceptions.Exception): def __init__(self, cmd_name, cmd_err, rc=None): self.cmd_name = cmd_name self.cmd_err = cmd_err self.rc = rc def dump(self): import types if type(self.cmd_err) == types.StringType: if self.rc: print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err) else: print "! %s: %s" % (self.cmd_name, self.cmd_err) elif type(self.cmd_err) == types.ListType: if self.rc: print "! %s (error %d):" % (self.cmd_name, self.rc) else: print "! %s:" % (self.cmd_name) for s in self.cmd_err: print "> %s" %(string.strip(s)) else: print self.cmd_err # ============================================================ # handle daemons, like the acceptor class DaemonHandler: """ Manage starting and stopping a daemon. Assumes daemon manages it's own pid file. """ def __init__(self, cmd): self.command = cmd self.path ="" def start(self): if self.running(): log(self.command, "already running.") if not self.path: self.path = find_prog(self.command) if not self.path: panic(self.command, "not found.") ret, out = runcmd(self.path +' '+ self.command_line()) if ret: raise CommandError(self.path, out, ret) def stop(self): if self.running(): pid = self.read_pidfile() try: log ("killing process", pid) os.kill(pid, 15) #time.sleep(1) # let daemon die except OSError, e: log("unable to kill", self.command, e) if self.running(): log("unable to kill", self.command) def running(self): pid = self.read_pidfile() if pid: try: os.kill(pid, 0) except OSError: self.clean_pidfile() else: return 1 return 0 def read_pidfile(self): try: fp = open(self.pidfile(), 'r') pid = int(fp.read()) fp.close() return pid except IOError: return 0 def clean_pidfile(self): """ Remove a stale pidfile """ log("removing stale pidfile:", self.pidfile()) try: os.unlink(self.pidfile()) except OSError, e: log(self.pidfile(), e) class AcceptorHandler(DaemonHandler): def __init__(self, port, net_type, send_mem, recv_mem, irq_aff): DaemonHandler.__init__(self, "acceptor") self.port = port self.flags = '' self.send_mem = send_mem self.recv_mem = recv_mem if irq_aff: self.flags = self.flags + ' -i' def pidfile(self): return "/var/run/%s-%d.pid" % (self.command, self.port) def command_line(self): return string.join(map(str,('-s', self.send_mem, '-r', self.recv_mem, self.flags, self.port))) acceptors = {} # start the acceptors def run_acceptors(): if config.lctl_dump or config.record: return for port in acceptors.keys(): daemon = acceptors[port] if not daemon.running(): daemon.start() def run_one_acceptor(port): if config.lctl_dump or config.record: return if acceptors.has_key(port): daemon = acceptors[port] if not daemon.running(): daemon.start() else: panic("run_one_acceptor: No acceptor defined for port:", port) def stop_acceptor(port): if acceptors.has_key(port): daemon = acceptors[port] if daemon.running(): daemon.stop() # ============================================================ # handle lctl interface class LCTLInterface: """ Manage communication with lctl """ def __init__(self, cmd): """ Initialize close by finding the lctl binary. """ self.lctl = find_prog(cmd) self.save_file = '' self.record_device = '' if not self.lctl: if config.noexec: debug('! lctl not found') self.lctl = 'lctl' else: raise CommandError('lctl', "unable to find lctl binary.") def use_save_file(self, file): self.save_file = file def record(self, dev_name, logname): log("Recording log", logname, "on", dev_name) self.record_device = dev_name self.record_log = logname def end_record(self): log("End recording log", self.record_log, "on", self.record_device) self.record_device = None self.record_log = None def set_nonblock(self, fd): fl = fcntl.fcntl(fd, F_GETFL) fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY) def run(self, cmds): """ run lctl the cmds are written to stdin of lctl lctl doesn't return errors when run in script mode, so stderr is checked should modify command line to accept multiple commands, or create complex command line options """ cmd_line = self.lctl if self.save_file: cmds = '\n dump ' + self.save_file + '\n' + cmds elif self.record_device: cmds = """ device $%s record %s %s""" % (self.record_device, self.record_log, cmds) debug("+", cmd_line, cmds) if config.noexec: return (0, []) child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command child.tochild.write(cmds + "\n") child.tochild.close() # From "Python Cookbook" from O'Reilly outfile = child.fromchild outfd = outfile.fileno() self.set_nonblock(outfd) errfile = child.childerr errfd = errfile.fileno() self.set_nonblock(errfd) outdata = errdata = '' outeof = erreof = 0 while 1: ready = select.select([outfd,errfd],[],[]) # Wait for input if outfd in ready[0]: outchunk = outfile.read() if outchunk == '': outeof = 1 outdata = outdata + outchunk if errfd in ready[0]: errchunk = errfile.read() if errchunk == '': erreof = 1 errdata = errdata + errchunk if outeof and erreof: break # end of "borrowed" code ret = child.wait() if os.WIFEXITED(ret): rc = os.WEXITSTATUS(ret) else: rc = 0 if rc or len(errdata): raise CommandError(self.lctl, errdata, rc) return rc, outdata def runcmd(self, *args): """ run lctl using the command line """ cmd = string.join(map(str,args)) debug("+", self.lctl, cmd) rc, out = run(self.lctl, cmd) if rc: raise CommandError(self.lctl, out, rc) return rc, out def network(self, net, nid): """ set mynid """ cmds = """ network %s mynid %s quit """ % (net, nid) self.run(cmds) # create a new connection def add_uuid(self, net_type, uuid, nid): cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type) self.run(cmds) def add_autoconn(self, net_type, send_mem, recv_mem, nid, hostaddr, port, flags): if net_type in ('tcp',) and not config.lctl_dump: cmds = """ network %s send_mem %d recv_mem %d add_autoconn %s %s %d %s quit""" % (net_type, send_mem, recv_mem, nid, hostaddr, port, flags ) self.run(cmds) def connect(self, srv): self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid) if srv.net_type in ('tcp',) and not config.lctl_dump: flags = 's' if srv.irq_affinity: flags = flags + 'i' self.add_autoconn(srv.net_type, srv.send_mem, srv.recv_mem, srv.nid, srv.hostaddr, srv.port, flags) # Recover a device def recover(self, dev_name, new_conn): cmds = """ device $%s recover %s""" %(dev_name, new_conn) self.run(cmds) # add a route to a range def add_route(self, net, gw, lo, hi): cmds = """ network %s add_route %s %s %s quit """ % (net, gw, lo, hi) try: self.run(cmds) except CommandError, e: log ("ignore: ") e.dump() def del_route(self, net, gw, lo, hi): cmds = """ ignore_errors network %s del_route %s %s %s quit """ % (net, gw, lo, hi) self.run(cmds) # add a route to a host def add_route_host(self, net, uuid, gw, tgt): self.add_uuid(net, uuid, tgt) cmds = """ network %s add_route %s %s quit """ % (net, gw, tgt) try: self.run(cmds) except CommandError, e: log ("ignore: ") e.dump() # add a route to a range def del_route_host(self, net, uuid, gw, tgt): self.del_uuid(uuid) cmds = """ ignore_errors network %s del_route %s %s quit """ % (net, gw, tgt) self.run(cmds) def del_autoconn(self, net_type, nid, hostaddr): if net_type in ('tcp',) and not config.lctl_dump: cmds = """ ignore_errors network %s del_autoconn %s %s s quit""" % (net_type, nid, hostaddr) self.run(cmds) # disconnect one connection def disconnect(self, srv): self.del_uuid(srv.nid_uuid) if srv.net_type in ('tcp',) and not config.lctl_dump: self.del_autoconn(srv.net_type, srv.nid, srv.hostaddr) def del_uuid(self, uuid): cmds = """ ignore_errors del_uuid %s quit""" % (uuid,) self.run(cmds) # disconnect all def disconnectAll(self, net): cmds = """ ignore_errors network %s disconnect quit""" % (net) self.run(cmds) def attach(self, type, name, uuid): cmds = """ attach %s %s %s quit""" % (type, name, uuid) self.run(cmds) def setup(self, name, setup = ""): cmds = """ cfg_device %s setup %s quit""" % (name, setup) self.run(cmds) # create a new device with lctl def newdev(self, type, name, uuid, setup = ""): self.attach(type, name, uuid); try: self.setup(name, setup) except CommandError, e: self.cleanup(name, uuid, 0) raise e # cleanup a device def cleanup(self, name, uuid, force, failover = 0): if failover: force = 1 cmds = """ ignore_errors cfg_device $%s cleanup %s %s detach quit""" % (name, ('', 'force')[force], ('', 'failover')[failover]) self.run(cmds) # create an lov def lov_setup(self, name, uuid, desc_uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist): cmds = """ attach lov %s %s lov_setup %s %d %d %d %s %s quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist) self.run(cmds) # create an lov def lov_setconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist): cmds = """ cfg_device $%s lov_setconfig %s %d %d %d %s %s quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist) self.run(cmds) # dump the log file def dump(self, dump_file): cmds = """ debug_kernel %s 1 quit""" % (dump_file) self.run(cmds) # get list of devices def device_list(self): devices = '/proc/fs/lustre/devices' ret = [] if os.access(devices, os.R_OK): try: fp = open(devices, 'r') ret = fp.readlines() fp.close() except IOError, e: log(e) return ret # get lustre version def lustre_version(self): rc, out = self.runcmd('version') return out # dump mount options def mount_option(self, profile, osc, mdc): cmds = """ mount_option %s %s %s quit""" % (profile, osc, mdc) self.run(cmds) # delete mount options def del_mount_option(self, profile): cmds = """ del_mount_option %s quit""" % (profile,) self.run(cmds) def set_timeout(self, timeout): cmds = """ set_timeout %s quit""" % (timeout,) self.run(cmds) # delete mount options def set_lustre_upcall(self, upcall): cmds = """ set_lustre_upcall %s quit""" % (upcall,) self.run(cmds) # ============================================================ # Various system-level functions # (ideally moved to their own module) # Run a command and return the output and status. # stderr is sent to /dev/null, could use popen3 to # save it if necessary def runcmd(cmd): debug ("+", cmd) if config.noexec: return (0, []) f = os.popen(cmd + ' 2>&1') out = f.readlines() ret = f.close() if ret: ret = ret >> 8 else: ret = 0 return (ret, out) def run(*args): cmd = string.join(map(str,args)) return runcmd(cmd) # Run a command in the background. def run_daemon(*args): cmd = string.join(map(str,args)) debug ("+", cmd) if config.noexec: return 0 f = os.popen(cmd + ' 2>&1') ret = f.close() if ret: ret = ret >> 8 else: ret = 0 return ret # Determine full path to use for an external command # searches dirname(argv[0]) first, then PATH def find_prog(cmd): syspath = string.split(os.environ['PATH'], ':') cmdpath = os.path.dirname(sys.argv[0]) syspath.insert(0, cmdpath); if config.portals: syspath.insert(0, os.path.join(config.portals, 'utils/')) for d in syspath: prog = os.path.join(d,cmd) if os.access(prog, os.X_OK): return prog return '' # Recursively look for file starting at base dir def do_find_file(base, mod): fullname = os.path.join(base, mod) if os.access(fullname, os.R_OK): return fullname for d in os.listdir(base): dir = os.path.join(base,d) if os.path.isdir(dir): module = do_find_file(dir, mod) if module: return module def find_module(src_dir, dev_dir, modname): mod = '%s.o' % (modname) module = src_dir +'/'+ dev_dir +'/'+ mod try: if os.access(module, os.R_OK): return module except OSError: pass return None # is the path a block device? def is_block(path): s = () try: s = os.stat(path) except OSError: return 0 return stat.S_ISBLK(s[stat.ST_MODE]) # build fs according to type # fixme: dangerous def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): block_cnt = '' jopt = '' iopt = '' if devsize: if devsize < 8000: panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"% (dev, devsize)) # devsize is in 1k, and fs block count is in 4k block_cnt = devsize/4 if fstype in ('ext3', 'extN'): # ext3 journal size is in megabytes if jsize == 0: if devsize == 0: if not is_block(dev): ret, out = runcmd("ls -l %s" %dev) devsize = int(string.split(out[0])[4]) / 1024 else: ret, out = runcmd("sfdisk -s %s" %dev) devsize = int(out[0]) if devsize > 1024 * 1024: jsize = ((devsize / 102400) * 4) if jsize > 400: jsize = 400 if jsize: jopt = "-J size=%d" %(jsize,) if isize: iopt = "-I %d" %(isize,) mkfs = 'mkfs.ext2 -j -b 4096 ' if not isblock or config.force: mkfs = mkfs + ' -F ' elif fstype == 'reiserfs': # reiserfs journal size is in blocks if jsize: jopt = "--journal_size %d" %(jsize,) mkfs = 'mkreiserfs -ff' else: panic('unsupported fs type: ', fstype) if config.mkfsoptions != None: mkfs = mkfs + ' ' + config.mkfsoptions if mkfsoptions != None: mkfs = mkfs + ' ' + mkfsoptions (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt) if ret: panic("Unable to build fs:", dev, string.join(out)) # enable hash tree indexing on fsswe if fstype in ('ext3', 'extN'): htree = 'echo "feature FEATURE_C5" | debugfs -w' (ret, out) = run (htree, dev) if ret: panic("Unable to enable htree:", dev) # some systems use /dev/loopN, some /dev/loop/N def loop_base(): import re loop = '/dev/loop' if not os.access(loop + str(0), os.R_OK): loop = loop + '/' if not os.access(loop + str(0), os.R_OK): panic ("can't access loop devices") return loop # find loop device assigned to thefile def find_loop(file): loop = loop_base() for n in xrange(0, MAX_LOOP_DEVICES): dev = loop + str(n) if os.access(dev, os.R_OK): (stat, out) = run('losetup', dev) if out and stat == 0: m = re.search(r'\((.*)\)', out[0]) if m and file == m.group(1): return dev else: break return '' # create file if necessary and assign the first free loop device def init_loop(file, size, fstype, journal_size, inode_size, mkfsoptions, reformat): dev = find_loop(file) if dev: print 'WARNING file:', file, 'already mapped to', dev return dev if reformat or not os.access(file, os.R_OK | os.W_OK): if size < 8000: panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (file,size)) (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file)) if ret: panic("Unable to create backing store:", file) mkfs(file, size, fstype, journal_size, inode_size, mkfsoptions, isblock=0) loop = loop_base() # find next free loop for n in xrange(0, MAX_LOOP_DEVICES): dev = loop + str(n) if os.access(dev, os.R_OK): (stat, out) = run('losetup', dev) if stat: run('losetup', dev, file) return dev else: print "out of loop devices" return '' print "out of loop devices" return '' # undo loop assignment def clean_loop(file): dev = find_loop(file) if dev: ret, out = run('losetup -d', dev) if ret: log('unable to clean loop device:', dev, 'for file:', file) logall(out) # determine if dev is formatted as a filesystem def need_format(fstype, dev): # FIXME don't know how to implement this return 0 # initialize a block device if needed def block_dev(dev, size, fstype, reformat, autoformat, journal_size, inode_size, mkfsoptions): if config.noexec: return dev if not is_block(dev): dev = init_loop(dev, size, fstype, journal_size, inode_size, mkfsoptions, reformat) elif reformat or (need_format(fstype, dev) and autoformat == 'yes'): mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions, isblock=0) # else: # panic("device:", dev, # "not prepared, and autoformat is not set.\n", # "Rerun with --reformat option to format ALL filesystems") return dev def if2addr(iface): """lookup IP address for an interface""" rc, out = run("/sbin/ifconfig", iface) if rc or not out: return None addr = string.split(out[1])[1] ip = string.split(addr, ':')[1] return ip def sys_get_elan_position_file(): procfiles = ["/proc/elan/device0/position", "/proc/qsnet/elan4/device0/position", "/proc/qsnet/elan3/device0/position"] for p in procfiles: if os.access(p, os.R_OK): return p return "" def sys_get_local_nid(net_type, wildcard, cluster_id): """Return the local nid.""" local = "" if sys_get_elan_position_file(): local = sys_get_local_address('elan', '*', cluster_id) else: local = sys_get_local_address(net_type, wildcard, cluster_id) return local def sys_get_local_address(net_type, wildcard, cluster_id): """Return the local address for the network type.""" local = "" if net_type in ('tcp',): if ':' in wildcard: iface, star = string.split(wildcard, ':') local = if2addr(iface) if not local: panic ("unable to determine ip for:", wildcard) else: host = socket.gethostname() local = socket.gethostbyname(host) elif net_type == 'elan': # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()' f = sys_get_elan_position_file() if not f: panic ("unable to determine local Elan ID") try: fp = open(f, 'r') lines = fp.readlines() fp.close() for l in lines: a = string.split(l) if a[0] == 'NodeId': elan_id = a[1] break try: nid = my_int(cluster_id) + my_int(elan_id) local = "%d" % (nid) except ValueError, e: local = elan_id except IOError, e: log(e) elif net_type == 'gm': fixme("automatic local address for GM") elif net_type == 'scimac': scinode="/opt/scali/sbin/scinode" if os.path.exists(scinode): (rc,local) = run(scinode) else: panic (scinode, " not found on node with scimac networking") if rc: panic (scinode, " failed") local=string.rstrip(local[0]) return local def mod_loaded(modname): """Check if a module is already loaded. Look in /proc/modules for it.""" try: fp = open('/proc/modules') lines = fp.readlines() fp.close() # please forgive my tired fingers for this one ret = filter(lambda word, mod=modname: word == mod, map(lambda line: string.split(line)[0], lines)) return ret except Exception, e: return 0 # XXX: instead of device_list, ask for $name and see what we get def is_prepared(name): """Return true if a device exists for the name""" if config.lctl_dump: return 0 if (config.noexec or config.record) and config.cleanup: return 1 try: # expect this format: # 1 UP ldlm ldlm ldlm_UUID 2 out = lctl.device_list() for s in out: if name == string.split(s)[3]: return 1 except CommandError, e: e.dump() return 0 def is_network_prepared(): """If the any device exists, then assume that all networking has been configured""" out = lctl.device_list() return len(out) > 0 def fs_is_mounted(path): """Return true if path is a mounted lustre filesystem""" try: fp = open('/proc/mounts') lines = fp.readlines() fp.close() for l in lines: a = string.split(l) if a[1] == path and a[2] == 'lustre_lite': return 1 except IOError, e: log(e) return 0 class kmod: """Manage kernel modules""" def __init__(self, lustre_dir, portals_dir): self.lustre_dir = lustre_dir self.portals_dir = portals_dir self.kmodule_list = [] def add_portals_module(self, dev_dir, modname): """Append a module to list of modules to load.""" self.kmodule_list.append((self.portals_dir, dev_dir, modname)) def add_lustre_module(self, dev_dir, modname): """Append a module to list of modules to load.""" self.kmodule_list.append((self.lustre_dir, dev_dir, modname)) def load_module(self): """Load all the modules in the list in the order they appear.""" for src_dir, dev_dir, mod in self.kmodule_list: if mod_loaded(mod) and not config.noexec: continue log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir) if src_dir: module = find_module(src_dir, dev_dir, mod) if not module: panic('module not found:', mod) (rc, out) = run('/sbin/insmod', module) if rc: raise CommandError('insmod', out, rc) else: (rc, out) = run('/sbin/modprobe', mod) if rc: raise CommandError('modprobe', out, rc) def cleanup_module(self): """Unload the modules in the list in reverse order.""" rev = self.kmodule_list rev.reverse() for src_dir, dev_dir, mod in rev: if not mod_loaded(mod) and not config.noexec: continue # debug hack if mod == 'portals' and config.dump: lctl.dump(config.dump) log('unloading module:', mod) (rc, out) = run('/sbin/rmmod', mod) if rc: log('! unable to unload module:', mod) logall(out) # ============================================================ # Classes to prepare and cleanup the various objects # class Module: """ Base class for the rest of the modules. The default cleanup method is defined here, as well as some utilitiy funcs. """ def __init__(self, module_name, db): self.db = db self.module_name = module_name self.name = self.db.getName() self.uuid = self.db.getUUID() self._server = None self._connected = 0 self.kmod = kmod(config.lustre, config.portals) def info(self, *args): msg = string.join(map(str,args)) print self.module_name + ":", self.name, self.uuid, msg def cleanup(self): """ default cleanup, used for most modules """ self.info() try: lctl.cleanup(self.name, self.uuid, config.force) except CommandError, e: log(self.module_name, "cleanup failed: ", self.name) e.dump() cleanup_error(e.rc) def add_portals_module(self, dev_dir, modname): """Append a module to list of modules to load.""" self.kmod.add_portals_module(dev_dir, modname) def add_lustre_module(self, dev_dir, modname): """Append a module to list of modules to load.""" self.kmod.add_lustre_module(dev_dir, modname) def load_module(self): """Load all the modules in the list in the order they appear.""" self.kmod.load_module() def cleanup_module(self): """Unload the modules in the list in reverse order.""" if self.safe_to_clean(): self.kmod.cleanup_module() def safe_to_clean(self): return 1 def safe_to_clean_modules(self): return self.safe_to_clean() class Network(Module): def __init__(self,db): Module.__init__(self, 'NETWORK', db) self.net_type = self.db.get_val('nettype') self.nid = self.db.get_val('nid', '*') self.cluster_id = self.db.get_val('clusterid', "0") self.port = self.db.get_val_int('port', 0) self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF) self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF) self.irq_affinity = self.db.get_val_int('irqaffinity', 0) if '*' in self.nid: self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id) if not self.nid: panic("unable to set nid for", self.net_type, self.nid, cluster_id) self.generic_nid = 1 debug("nid:", self.nid) else: self.generic_nid = 0 self.nid_uuid = self.nid_to_uuid(self.nid) self.hostaddr = self.db.get_val('hostaddr', self.nid) if '*' in self.hostaddr: self.hostaddr = sys_get_local_address(self.net_type, self.hostaddr, self.cluster_id) if not self.hostaddr: panic("unable to set hostaddr for", self.net_type, self.hostaddr, self.cluster_id) debug("hostaddr:", self.hostaddr) self.add_portals_module("libcfs", 'portals') if node_needs_router(): self.add_portals_module("router", 'kptlrouter') if self.net_type == 'tcp': self.add_portals_module("knals/socknal", 'ksocknal') if self.net_type == 'elan': self.add_portals_module("knals/qswnal", 'kqswnal') if self.net_type == 'gm': self.add_portals_module("knals/gmnal", 'kgmnal') if self.net_type == 'scimac': self.add_portals_module("knals/scimacnal", 'kscimacnal') def nid_to_uuid(self, nid): return "NID_%s_UUID" %(nid,) def prepare(self): if is_network_prepared(): return self.info(self.net_type, self.nid, self.port) if not (config.record and self.generic_nid): lctl.network(self.net_type, self.nid) if self.net_type == 'tcp': sys_tweak_socknal() if self.net_type == 'elan': sys_optimize_elan() if self.port and node_is_router(): run_one_acceptor(self.port) self.connect_peer_gateways() def connect_peer_gateways(self): for router in self.db.lookup_class('node'): if router.get_val_int('router', 0): for netuuid in router.get_networks(): net = self.db.lookup(netuuid) gw = Network(net) if (gw.cluster_id == self.cluster_id and gw.net_type == self.net_type): if gw.nid != self.nid: lctl.connect(gw) def disconnect_peer_gateways(self): for router in self.db.lookup_class('node'): if router.get_val_int('router', 0): for netuuid in router.get_networks(): net = self.db.lookup(netuuid) gw = Network(net) if (gw.cluster_id == self.cluster_id and gw.net_type == self.net_type): if gw.nid != self.nid: try: lctl.disconnect(gw) except CommandError, e: print "disconnect failed: ", self.name e.dump() cleanup_error(e.rc) def safe_to_clean(self): return not is_network_prepared() def cleanup(self): self.info(self.net_type, self.nid, self.port) if self.port: stop_acceptor(self.port) if node_is_router(): self.disconnect_peer_gateways() class RouteTable(Module): def __init__(self,db): Module.__init__(self, 'ROUTES', db) def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi): # only setup connections for tcp NALs srvdb = None if not net_type in ('tcp',): return None # connect to target if route is to single node and this node is the gw if lo == hi and local_interface(net_type, gw_cluster_id, gw): if not local_cluster(net_type, tgt_cluster_id): panic("target", lo, " not on the local cluster") srvdb = self.db.nid2server(lo, net_type, gw_cluster_id) # connect to gateway if this node is not the gw elif (local_cluster(net_type, gw_cluster_id) and not local_interface(net_type, gw_cluster_id, gw)): srvdb = self.db.nid2server(gw, net_type, gw_cluster_id) else: return None if not srvdb: panic("no server for nid", lo) return None return Network(srvdb) def prepare(self): if is_network_prepared(): return self.info() for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl(): lctl.add_route(net_type, gw, lo, hi) srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi) if srv: lctl.connect(srv) def safe_to_clean(self): return not is_network_prepared() def cleanup(self): if is_network_prepared(): # the network is still being used, don't clean it up return for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl(): srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi) if srv: try: lctl.disconnect(srv) except CommandError, e: print "disconnect failed: ", self.name e.dump() cleanup_error(e.rc) try: lctl.del_route(net_type, gw, lo, hi) except CommandError, e: print "del_route failed: ", self.name e.dump() cleanup_error(e.rc) class Management(Module): def __init__(self, db): Module.__init__(self, 'MGMT', db) self.add_lustre_module('lvfs', 'lvfs') self.add_lustre_module('obdclass', 'obdclass') self.add_lustre_module('ptlrpc', 'ptlrpc') self.add_lustre_module('mgmt', 'mgmt_svc') def prepare(self): if is_prepared(self.name): return self.info() lctl.newdev("mgmt", self.name, self.uuid) def safe_to_clean(self): return 1 def cleanup(self): if is_prepared(self.name): Module.cleanup(self) # This is only needed to load the modules; the LDLM device # is now created automatically. class LDLM(Module): def __init__(self,db): Module.__init__(self, 'LDLM', db) self.add_lustre_module('lvfs', 'lvfs') self.add_lustre_module('obdclass', 'obdclass') self.add_lustre_module('ptlrpc', 'ptlrpc') def prepare(self): return def cleanup(self): return class LOV(Module): def __init__(self, db, uuid, fs_name, name_override = None, config_only = None): Module.__init__(self, 'LOV', db) if name_override != None: self.name = "lov_%s" % name_override self.add_lustre_module('lov', 'lov') self.mds_uuid = self.db.get_first_ref('mds') self.stripe_sz = self.db.get_val_int('stripesize', 65536) self.stripe_off = self.db.get_val_int('stripeoffset', 0) self.pattern = self.db.get_val_int('stripepattern', 0) self.devlist = self.db.get_refs('obd') self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist)) self.osclist = [] self.desc_uuid = self.uuid self.uuid = generate_client_uuid(self.name) self.fs_name = fs_name if config_only: self.config_only = 1 return self.config_only = None mds= self.db.lookup(self.mds_uuid) self.mds_name = mds.getName() for obd_uuid in self.devlist: obd = self.db.lookup(obd_uuid) osc = get_osc(obd, self.uuid, fs_name) if osc: self.osclist.append(osc) else: panic('osc not found:', obd_uuid) def prepare(self): if is_prepared(self.name): return if self.config_only: panic("Can't prepare config_only LOV ", self.name) for osc in self.osclist: try: # Only ignore connect failures with --force, which # isn't implemented here yet. osc.prepare(ignore_connect_failure=0) except CommandError, e: print "Error preparing OSC %s\n" % osc.uuid raise e self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern, self.devlist, self.mds_name) lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.mds_name, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern, string.join(self.devlist)) def cleanup(self): if is_prepared(self.name): Module.cleanup(self) if self.config_only: panic("Can't clean up config_only LOV ", self.name) for osc in self.osclist: osc.cleanup() def load_module(self): if self.config_only: panic("Can't load modules for config_only LOV ", self.name) for osc in self.osclist: osc.load_module() break Module.load_module(self) def cleanup_module(self): if self.config_only: panic("Can't cleanup modules for config_only LOV ", self.name) Module.cleanup_module(self) for osc in self.osclist: osc.cleanup_module() break class MDSDEV(Module): def __init__(self,db): Module.__init__(self, 'MDSDEV', db) self.devpath = self.db.get_val('devpath','') self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) self.fstype = self.db.get_val('fstype', '') self.nspath = self.db.get_val('nspath', '') self.mkfsoptions = self.db.get_val('mkfsoptions', '') # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid target_uuid = self.db.get_first_ref('target') mds = self.db.lookup(target_uuid) self.name = mds.getName() self.filesystem_uuids = mds.get_refs('filesystem') # FIXME: if fstype not set, then determine based on kernel version self.format = self.db.get_val('autoformat', "no") if mds.get_val('failover', 0): self.failover_mds = 'f' else: self.failover_mds = 'n' active_uuid = get_active_target(mds) if not active_uuid: panic("No target device found:", target_uuid) if active_uuid == self.uuid: self.active = 1 else: self.active = 0 if self.active and config.group and config.group != ost.get_val('group'): self.active = 0 self.inode_size = self.db.get_val_int('inodesize', 0) if self.inode_size == 0: # find the LOV for this MDS lovconfig_uuid = mds.get_first_ref('lovconfig') if not lovconfig_uuid: panic("No LOV config found for MDS ", mds.name) lovconfig = mds.lookup(lovconfig_uuid) lov_uuid = lovconfig.get_first_ref('lov') if not lov_uuid: panic("No LOV found for lovconfig ", lovconfig.name) lov = LOV(self.db.lookup(lov_uuid), lov_uuid, 'FS_name', config_only = 1) # default stripe count controls default inode_size stripe_count = lov.stripe_cnt if stripe_count > 77: self.inode_size = 4096 elif stripe_count > 35: self.inode_size = 2048 elif stripe_count > 13: self.inode_size = 1024 elif stripe_count > 3: self.inode_size = 512 else: self.inode_size = 256 self.target_dev_uuid = self.uuid self.uuid = target_uuid # modules self.add_lustre_module('mdc', 'mdc') self.add_lustre_module('osc', 'osc') self.add_lustre_module('lov', 'lov') self.add_lustre_module('mds', 'mds') if self.fstype: self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype)) def load_module(self): if self.active: Module.load_module(self) def prepare(self): if is_prepared(self.name): return if not self.active: debug(self.uuid, "not active") return if config.reformat: # run write_conf automatically, if --reformat used self.write_conf() self.info(self.devpath, self.fstype, self.size, self.format) run_acceptors() # never reformat here blkdev = block_dev(self.devpath, self.size, self.fstype, 0, self.format, self.journal_size, self.inode_size, self.mkfsoptions) if not is_prepared('MDT'): lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") try: lctl.newdev("mds", self.name, self.uuid, setup ="%s %s %s" %(blkdev, self.fstype, self.name)) except CommandError, e: if e.rc == 2: panic("MDS is missing the config log. Need to run " + "lconf --write_conf.") else: raise e def write_conf(self): if is_prepared(self.name): return self.info(self.devpath, self.fstype, self.format) blkdev = block_dev(self.devpath, self.size, self.fstype, config.reformat, self.format, self.journal_size, self.inode_size, self.mkfsoptions) lctl.newdev("mds", self.name, self.uuid, setup ="%s %s" %(blkdev, self.fstype)) # record logs for the MDS lov for uuid in self.filesystem_uuids: log("recording clients for filesystem:", uuid) fs = self.db.lookup(uuid) obd_uuid = fs.get_first_ref('obd') client_uuid = generate_client_uuid(self.name) client = VOSC(self.db.lookup(obd_uuid), client_uuid, self.name, self.name) config.record = 1 lctl.record(self.name, self.name) client.prepare() lctl.mount_option(self.name, client.get_name(), "") lctl.end_record() config.cleanup = 1 lctl.record(self.name, self.name + '-clean') client.cleanup() lctl.del_mount_option(self.name) lctl.end_record() config.cleanup = 0 config.record = 0 # record logs for each client if config.ldapurl: config_options = "--ldapurl " + config.ldapurl + " --config " + config.config else: config_options = CONFIG_FILE for node_db in self.db.lookup_class('node'): client_name = node_db.getName() for prof_uuid in node_db.get_refs('profile'): prof_db = node_db.lookup(prof_uuid) # refactor this into a funtion to test "clientness" # of a node. for ref_class, ref_uuid in prof_db.get_all_refs(): if ref_class in ('mountpoint','echoclient'): debug("recording", client_name) old_noexec = config.noexec config.noexec = 0 noexec_opt = ('', '-n') ret, out = run (sys.argv[0], noexec_opt[old_noexec == 1], " -v --record --nomod", "--record_log", client_name, "--record_device", self.name, "--node", client_name, config_options) if config.verbose: for s in out: log("record> ", string.strip(s)) ret, out = run (sys.argv[0], noexec_opt[old_noexec == 1], "--cleanup -v --record --nomod", "--record_log", client_name + "-clean", "--record_device", self.name, "--node", client_name, config_options) if config.verbose: for s in out: log("record> ", string.strip(s)) config.noexec = old_noexec try: lctl.cleanup(self.name, self.uuid, 0, 0) except CommandError, e: log(self.module_name, "cleanup failed: ", self.name) e.dump() cleanup_error(e.rc) Module.cleanup(self) clean_loop(self.devpath) def msd_remaining(self): out = lctl.device_list() for s in out: if string.split(s)[2] in ('mds',): return 1 def safe_to_clean(self): return self.active def safe_to_clean_modules(self): return not self.msd_remaining() def cleanup(self): if not self.active: debug(self.uuid, "not active") return self.info() if is_prepared(self.name): try: lctl.cleanup(self.name, self.uuid, config.force, config.failover) except CommandError, e: log(self.module_name, "cleanup failed: ", self.name) e.dump() cleanup_error(e.rc) Module.cleanup(self) if not self.msd_remaining() and is_prepared('MDT'): try: lctl.cleanup("MDT", "MDT_UUID", config.force, config.failover) except CommandError, e: print "cleanup failed: ", self.name e.dump() cleanup_error(e.rc) clean_loop(self.devpath) class OSD(Module): def __init__(self, db): Module.__init__(self, 'OSD', db) self.osdtype = self.db.get_val('osdtype') self.devpath = self.db.get_val('devpath', '') self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) self.inode_size = self.db.get_val_int('inodesize', 0) self.mkfsoptions = self.db.get_val('mkfsoptions', '') self.fstype = self.db.get_val('fstype', '') self.nspath = self.db.get_val('nspath', '') target_uuid = self.db.get_first_ref('target') ost = self.db.lookup(target_uuid) self.name = ost.getName() self.format = self.db.get_val('autoformat', 'yes') if ost.get_val('failover', 0): self.failover_ost = 'f' else: self.failover_ost = 'n' active_uuid = get_active_target(ost) if not active_uuid: panic("No target device found:", target_uuid) if active_uuid == self.uuid: self.active = 1 else: self.active = 0 if self.active and config.group and config.group != ost.get_val('group'): self.active = 0 self.target_dev_uuid = self.uuid self.uuid = target_uuid # modules self.add_lustre_module('ost', 'ost') # FIXME: should we default to ext3 here? if self.fstype: self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype)) self.add_lustre_module(self.osdtype, self.osdtype) def load_module(self): if self.active: Module.load_module(self) # need to check /proc/mounts and /etc/mtab before # formatting anything. # FIXME: check if device is already formatted. def prepare(self): if is_prepared(self.name): return if not self.active: debug(self.uuid, "not active") return self.info(self.osdtype, self.devpath, self.size, self.fstype, self.format, self.journal_size, self.inode_size) run_acceptors() if self.osdtype == 'obdecho': blkdev = '' else: blkdev = block_dev(self.devpath, self.size, self.fstype, config.reformat, self.format, self.journal_size, self.inode_size, self.mkfsoptions) lctl.newdev(self.osdtype, self.name, self.uuid, setup ="%s %s %s" %(blkdev, self.fstype, self.failover_ost)) if not is_prepared('OSS'): lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="") def osd_remaining(self): out = lctl.device_list() for s in out: if string.split(s)[2] in ('obdfilter', 'obdecho'): return 1 def safe_to_clean(self): return self.active def safe_to_clean_modules(self): return not self.osd_remaining() def cleanup(self): if not self.active: debug(self.uuid, "not active") return if is_prepared(self.name): self.info() try: lctl.cleanup(self.name, self.uuid, config.force, config.failover) except CommandError, e: log(self.module_name, "cleanup failed: ", self.name) e.dump() cleanup_error(e.rc) if not self.osd_remaining() and is_prepared('OSS'): try: lctl.cleanup("OSS", "OSS_UUID", config.force, config.failover) except CommandError, e: print "cleanup failed: ", self.name e.dump() cleanup_error(e.rc) if not self.osdtype == 'obdecho': clean_loop(self.devpath) def mgmt_uuid_for_fs(mtpt_name): if not mtpt_name: return '' mtpt_db = toplevel.lookup_name(mtpt_name) fs_uuid = mtpt_db.get_first_ref('filesystem') fs = toplevel.lookup(fs_uuid) if not fs: return '' return fs.get_first_ref('mgmt') # Generic client module, used by OSC and MDC class Client(Module): def __init__(self, tgtdb, uuid, module, fs_name, self_name=None, module_dir=None): self.target_name = tgtdb.getName() self.target_uuid = tgtdb.getUUID() self.db = tgtdb self.tgt_dev_uuid = get_active_target(tgtdb) if not self.tgt_dev_uuid: panic("No target device found for target:", self.target_name) self.kmod = kmod(config.lustre, config.portals) self._server = None self._connected = 0 self.module = module self.module_name = string.upper(module) if not self_name: self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(), self.target_name, fs_name) else: self.name = self_name self.uuid = uuid self.lookup_server(self.tgt_dev_uuid) mgmt_uuid = mgmt_uuid_for_fs(fs_name) if mgmt_uuid: self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid) else: self.mgmt_name = '' self.fs_name = fs_name if not module_dir: module_dir = module self.add_lustre_module(module_dir, module) def lookup_server(self, srv_uuid): """ Lookup a server's network information """ self._server_nets = get_ost_net(self.db, srv_uuid) if len(self._server_nets) == 0: panic ("Unable to find a server for:", srv_uuid) def get_servers(self): return self._server_nets def prepare(self, ignore_connect_failure = 0): self.info(self.target_uuid) if is_prepared(self.name): self.cleanup() try: srv = choose_local_server(self.get_servers()) if srv: lctl.connect(srv) else: routes = find_route(self.get_servers()) if len(routes) == 0: panic ("no route to", self.target_uuid) for (srv, r) in routes: lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3]) except CommandError, e: if not ignore_connect_failure: raise e if srv: if self.target_uuid in config.inactive and self.permits_inactive(): debug("%s inactive" % self.target_uuid) inactive_p = "inactive" else: debug("%s active" % self.target_uuid) inactive_p = "" lctl.newdev(self.module, self.name, self.uuid, setup ="%s %s %s %s" % (self.target_uuid, srv.nid_uuid, inactive_p, self.mgmt_name)) def cleanup(self): if is_prepared(self.name): Module.cleanup(self) try: srv = choose_local_server(self.get_servers()) if srv: lctl.disconnect(srv) else: for (srv, r) in find_route(self.get_servers()): lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3]) except CommandError, e: log(self.module_name, "cleanup failed: ", self.name) e.dump() cleanup_error(e.rc) class MDC(Client): def __init__(self, db, uuid, fs_name): Client.__init__(self, db, uuid, 'mdc', fs_name) def permits_inactive(self): return 0 class OSC(Client): def __init__(self, db, uuid, fs_name): Client.__init__(self, db, uuid, 'osc', fs_name) def permits_inactive(self): return 1 def mgmtcli_name_for_uuid(uuid): return 'MGMTCLI_%s' % uuid class ManagementClient(Client): def __init__(self, db, uuid): Client.__init__(self, db, uuid, 'mgmt_cli', '', self_name = mgmtcli_name_for_uuid(db.getUUID()), module_dir = 'mgmt') class COBD(Module): def __init__(self, db): Module.__init__(self, 'COBD', db) self.real_uuid = self.db.get_first_ref('realobd') self.cache_uuid = self.db.get_first_ref('cacheobd') self.add_lustre_module('cobd' , 'cobd') # need to check /proc/mounts and /etc/mtab before # formatting anything. # FIXME: check if device is already formatted. def prepare(self): if is_prepared(self.name): return self.info(self.real_uuid, self.cache_uuid) lctl.newdev("cobd", self.name, self.uuid, setup ="%s %s" %(self.real_uuid, self.cache_uuid)) # virtual interface for OSC and LOV class VOSC(Module): def __init__(self, db, uuid, fs_name, name_override = None): Module.__init__(self, 'VOSC', db) if db.get_class() == 'lov': self.osc = LOV(db, uuid, fs_name, name_override) else: self.osc = get_osc(db, uuid, fs_name) def get_uuid(self): return self.osc.uuid def get_name(self): return self.osc.name def prepare(self): self.osc.prepare() def cleanup(self): self.osc.cleanup() def load_module(self): self.osc.load_module() def cleanup_module(self): self.osc.cleanup_module() class ECHO_CLIENT(Module): def __init__(self,db): Module.__init__(self, 'ECHO_CLIENT', db) self.add_lustre_module('obdecho', 'obdecho') self.obd_uuid = self.db.get_first_ref('obd') obd = self.db.lookup(self.obd_uuid) self.uuid = generate_client_uuid(self.name) self.osc = VOSC(obd, self.uuid, self.name) def prepare(self): if is_prepared(self.name): return run_acceptors() self.osc.prepare() # XXX This is so cheating. -p self.info(self.obd_uuid) lctl.newdev("echo_client", self.name, self.uuid, setup = self.osc.get_name()) def cleanup(self): if is_prepared(self.name): Module.cleanup(self) self.osc.cleanup() def load_module(self): self.osc.load_module() Module.load_module(self) def cleanup_module(self): Module.cleanup_module(self) self.osc.cleanup_module() def generate_client_uuid(name): client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576), name, int(random.random() * 1048576), int(random.random() * 1048576)) return client_uuid[:36] class Mountpoint(Module): def __init__(self,db): Module.__init__(self, 'MTPT', db) self.path = self.db.get_val('path') self.fs_uuid = self.db.get_first_ref('filesystem') fs = self.db.lookup(self.fs_uuid) self.mds_uuid = fs.get_first_ref('mds') self.obd_uuid = fs.get_first_ref('obd') self.mgmt_uuid = fs.get_first_ref('mgmt') obd = self.db.lookup(self.obd_uuid) client_uuid = generate_client_uuid(self.name) self.vosc = VOSC(obd, client_uuid, self.name) self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid) self.add_lustre_module('mdc', 'mdc') self.add_lustre_module('llite', 'llite') if self.mgmt_uuid: self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid), client_uuid) else: self.mgmtcli = None def prepare(self): if fs_is_mounted(self.path): log(self.path, "already mounted.") return run_acceptors() if self.mgmtcli: self.mgmtcli.prepare() self.vosc.prepare() self.mdc.prepare() mdc_name = self.mdc.name self.info(self.path, self.mds_uuid, self.obd_uuid) if config.record or config.lctl_dump: lctl.mount_option(local_node_name, self.vosc.get_name(), mdc_name) return cmd = "mount -t lustre_lite -o osc=%s,mdc=%s %s %s" % \ (self.vosc.get_name(), mdc_name, config.config, self.path) run("mkdir", self.path) ret, val = run(cmd) if ret: self.mdc.cleanup() self.vosc.cleanup() panic("mount failed:", self.path, ":", string.join(val)) def cleanup(self): self.info(self.path, self.mds_uuid,self.obd_uuid) if config.record or config.lctl_dump: lctl.del_mount_option(local_node_name) else: if fs_is_mounted(self.path): if config.force: (rc, out) = run("umount", "-f", self.path) else: (rc, out) = run("umount", self.path) if rc: raise CommandError('umount', out, rc) if fs_is_mounted(self.path): panic("fs is still mounted:", self.path) self.mdc.cleanup() self.vosc.cleanup() if self.mgmtcli: self.mgmtcli.cleanup() def load_module(self): if self.mgmtcli: self.mgmtcli.load_module() self.vosc.load_module() Module.load_module(self) def cleanup_module(self): Module.cleanup_module(self) self.vosc.cleanup_module() if self.mgmtcli: self.mgmtcli.cleanup_module() # ============================================================ # misc query functions def get_ost_net(self, osd_uuid): srv_list = [] if not osd_uuid: return srv_list osd = self.lookup(osd_uuid) node_uuid = osd.get_first_ref('node') node = self.lookup(node_uuid) if not node: panic("unable to find node for osd_uuid:", osd_uuid, " node_ref:", node_uuid) for net_uuid in node.get_networks(): db = node.lookup(net_uuid) srv_list.append(Network(db)) return srv_list # the order of iniitailization is based on level. def getServiceLevel(self): type = self.get_class() ret=0; if type in ('network',): ret = 5 elif type in ('routetbl',): ret = 6 elif type in ('ldlm',): ret = 20 elif type in ('mgmt',): ret = 25 elif type in ('osd', 'cobd'): ret = 30 elif type in ('mdsdev',): ret = 40 elif type in ('mountpoint', 'echoclient'): ret = 70 else: panic("Unknown type: ", type) if ret < config.minlevel or ret > config.maxlevel: ret = 0 return ret # # return list of services in a profile. list is a list of tuples # [(level, db_object),] def getServices(self): list = [] for ref_class, ref_uuid in self.get_all_refs(): servdb = self.lookup(ref_uuid) if servdb: level = getServiceLevel(servdb) if level > 0: list.append((level, servdb)) else: panic('service not found: ' + ref_uuid) list.sort() return list ############################################################ # MDC UUID hack - # FIXME: clean this mess up! # # OSC is no longer in the xml, so we have to fake it. # this is getting ugly and begging for another refactoring def get_osc(ost_db, uuid, fs_name): osc = OSC(ost_db, uuid, fs_name) return osc def get_mdc(db, uuid, fs_name, mds_uuid): mds_db = db.lookup(mds_uuid); if not mds_db: panic("no mds:", mds_uuid) mdc = MDC(mds_db, uuid, fs_name) return mdc ############################################################ # routing ("rooting") # list of (nettype, cluster_id, nid) local_clusters = [] def find_local_clusters(node_db): global local_clusters for netuuid in node_db.get_networks(): net = node_db.lookup(netuuid) srv = Network(net) debug("add_local", netuuid) local_clusters.append((srv.net_type, srv.cluster_id, srv.nid)) if srv.port > 0: if acceptors.has_key(srv.port): panic("duplicate port:", srv.port) acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type, srv.send_mem, srv.recv_mem, srv.irq_affinity) # This node is a gateway. is_router = 0 def node_is_router(): return is_router # If there are any routers found in the config, then this will be true # and all nodes will load kptlrouter. needs_router = 0 def node_needs_router(): return needs_router or is_router # list of (nettype, gw, tgt_cluster_id, lo, hi) # Currently, these local routes are only added to kptlrouter route # table if they are needed to connect to a specific server. This # should be changed so all available routes are loaded, and the # ptlrouter can make all the decisions. local_routes = [] def find_local_routes(lustre): """ Scan the lustre config looking for routers . Build list of routes. """ global local_routes, needs_router local_routes = [] list = lustre.lookup_class('node') for router in list: if router.get_val_int('router', 0): needs_router = 1 for (local_type, local_cluster_id, local_nid) in local_clusters: gw = None for netuuid in router.get_networks(): db = router.lookup(netuuid) if (local_type == db.get_val('nettype') and local_cluster_id == db.get_val('clusterid')): gw = db.get_val('nid') break if gw: debug("find_local_routes: gw is", gw) for route in router.get_local_routes(local_type, gw): local_routes.append(route) debug("find_local_routes:", local_routes) def choose_local_server(srv_list): for srv in srv_list: if local_cluster(srv.net_type, srv.cluster_id): return srv def local_cluster(net_type, cluster_id): for cluster in local_clusters: if net_type == cluster[0] and cluster_id == cluster[1]: return 1 return 0 def local_interface(net_type, cluster_id, nid): for cluster in local_clusters: if (net_type == cluster[0] and cluster_id == cluster[1] and nid == cluster[2]): return 1 return 0 def find_route(srv_list): result = [] frm_type = local_clusters[0][0] for srv in srv_list: debug("find_route: srv:", srv.nid, "type: ", srv.net_type) to_type = srv.net_type to = srv.nid cluster_id = srv.cluster_id debug ('looking for route to', to_type, to) for r in local_routes: debug("find_route: ", r) if (r[3] <= to and to <= r[4]) and cluster_id == r[2]: result.append((srv, r)) return result def get_active_target(db): target_uuid = db.getUUID() target_name = db.getName() node_name = get_select(target_name) if node_name: tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid) else: tgt_dev_uuid = db.get_first_ref('active') return tgt_dev_uuid def get_server_by_nid_uuid(db, nid_uuid): for n in db.lookup_class("network"): net = Network(n) if net.nid_uuid == nid_uuid: return net ############################################################ # lconf level logic # Start a service. def newService(db): type = db.get_class() debug('Service:', type, db.getName(), db.getUUID()) n = None if type == 'ldlm': n = LDLM(db) elif type == 'lov': n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID") elif type == 'network': n = Network(db) elif type == 'routetbl': n = RouteTable(db) elif type == 'osd': n = OSD(db) elif type == 'cobd': n = COBD(db) elif type == 'mdsdev': n = MDSDEV(db) elif type == 'mountpoint': n = Mountpoint(db) elif type == 'echoclient': n = ECHO_CLIENT(db) elif type == 'mgmt': n = Management(db) else: panic ("unknown service type:", type) return n # # Prepare the system to run lustre using a particular profile # in a the configuration. # * load & the modules # * setup networking for the current node # * make sure partitions are in place and prepared # * initialize devices with lctl # Levels is important, and needs to be enforced. def for_each_profile(db, prof_list, operation): for prof_uuid in prof_list: prof_db = db.lookup(prof_uuid) if not prof_db: panic("profile:", profile, "not found.") services = getServices(prof_db) operation(services) def doWriteconf(services): if config.nosetup: return for s in services: if s[1].get_class() == 'mdsdev': n = newService(s[1]) n.write_conf() def doSetup(services): if config.nosetup: return for s in services: n = newService(s[1]) n.prepare() def doModules(services): if config.nomod: return for s in services: n = newService(s[1]) n.load_module() def doCleanup(services): if config.nosetup: return services.reverse() for s in services: n = newService(s[1]) if n.safe_to_clean(): n.cleanup() def doUnloadModules(services): if config.nomod: return services.reverse() for s in services: n = newService(s[1]) if n.safe_to_clean_modules(): n.cleanup_module() # # Load profile for def doHost(lustreDB, hosts): global is_router, local_node_name node_db = None for h in hosts: node_db = lustreDB.lookup_name(h, 'node') if node_db: break if not node_db: panic('No host entry found.') local_node_name = node_db.get_val('name', 0) is_router = node_db.get_val_int('router', 0) lustre_upcall = node_db.get_val('lustreUpcall', '') portals_upcall = node_db.get_val('portalsUpcall', '') timeout = node_db.get_val_int('timeout', 0) ptldebug = node_db.get_val('ptldebug', '') subsystem = node_db.get_val('subsystem', '') find_local_clusters(node_db) if not is_router: find_local_routes(lustreDB) # Two step process: (1) load modules, (2) setup lustre # if not cleaning, load modules first. prof_list = node_db.get_refs('profile') if config.write_conf: for_each_profile(node_db, prof_list, doModules) sys_make_devices() for_each_profile(node_db, prof_list, doWriteconf) for_each_profile(node_db, prof_list, doUnloadModules) elif config.recover: if not (config.tgt_uuid and config.client_uuid and config.conn_uuid): raise Lustre.LconfError( "--recovery requires --tgt_uuid " + "--client_uuid --conn_uuid ") doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid, config.conn_uuid) elif config.cleanup: if config.force: # the command line can override this value timeout = 5 # ugly hack, only need to run lctl commands for --dump if config.lctl_dump or config.record: for_each_profile(node_db, prof_list, doCleanup) return sys_set_timeout(timeout) sys_set_ptldebug(ptldebug) sys_set_subsystem(subsystem) sys_set_lustre_upcall(lustre_upcall) sys_set_portals_upcall(portals_upcall) for_each_profile(node_db, prof_list, doCleanup) for_each_profile(node_db, prof_list, doUnloadModules) else: # ugly hack, only need to run lctl commands for --dump if config.lctl_dump or config.record: sys_set_timeout(timeout) sys_set_lustre_upcall(lustre_upcall) for_each_profile(node_db, prof_list, doSetup) return sys_make_devices() sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF) sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF) for_each_profile(node_db, prof_list, doModules) sys_set_debug_path() sys_set_ptldebug(ptldebug) sys_set_subsystem(subsystem) script = config.gdb_script run(lctl.lctl, ' modules >', script) if config.gdb: log ("The GDB module script is in", script) # pause, so user has time to break and # load the script time.sleep(5) sys_set_timeout(timeout) sys_set_lustre_upcall(lustre_upcall) sys_set_portals_upcall(portals_upcall) for_each_profile(node_db, prof_list, doSetup) def doRecovery(db, lctl, tgt_uuid, client_uuid, nid_uuid): tgt = db.lookup(tgt_uuid) if not tgt: raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.") new_uuid = get_active_target(tgt) if not new_uuid: raise Lustre.LconfError("doRecovery: no active target found for: " + tgt_uuid) net = choose_local_server(get_ost_net(db, new_uuid)) if not net: raise Lustre.LconfError("Unable to find a connection to:" + new_uuid) log("Reconnecting", tgt_uuid, " to ", net.nid_uuid); try: oldnet = get_server_by_nid_uuid(db, nid_uuid) if oldnet: lctl.disconnect(oldnet) except CommandError, e: log("recover: disconnect", nid_uuid, "failed: ") e.dump() try: lctl.connect(net) except CommandError, e: log("recover: connect failed") e.dump() lctl.recover(client_uuid, net.nid_uuid) def setupModulePath(cmd, portals_dir = PORTALS_DIR): base = os.path.dirname(cmd) if development_mode(): if not config.lustre: config.lustre = (os.path.join(base, "..")) # normalize the portals dir, using command line arg if set if config.portals: portals_dir = config.portals dir = os.path.join(config.lustre, portals_dir) config.portals = dir debug('config.portals', config.portals) elif config.lustre and config.portals: # production mode # if --lustre and --portals, normalize portals # can ignore POTRALS_DIR here, since it is probly useless here config.portals = os.path.join(config.lustre, config.portals) debug('config.portals B', config.portals) def sysctl(path, val): debug("+ sysctl", path, val) if config.noexec: return try: fp = open(os.path.join('/proc/sys', path), 'w') fp.write(str(val)) fp.close() except IOError, e: panic(str(e)) def sys_set_debug_path(): sysctl('portals/debug_path', config.debug_path) def sys_set_lustre_upcall(upcall): # the command overrides the value in the node config if config.lustre_upcall: upcall = config.lustre_upcall elif config.upcall: upcall = config.upcall if upcall: lctl.set_lustre_upcall(upcall) def sys_set_portals_upcall(upcall): # the command overrides the value in the node config if config.portals_upcall: upcall = config.portals_upcall elif config.upcall: upcall = config.upcall if upcall: sysctl('portals/upcall', upcall) def sys_set_timeout(timeout): # the command overrides the value in the node config if config.timeout and config.timeout > 0: timeout = config.timeout if timeout != None and timeout > 0: lctl.set_timeout(timeout) def sys_tweak_socknal (): if config.single_socket: sysctl("socknal/typed", 0) def sys_optimize_elan (): procfiles = ["/proc/elan/config/eventint_punt_loops", "/proc/qsnet/elan3/config/eventint_punt_loops", "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"] for p in procfiles: if os.access(p, os.R_OK): run ("echo 0 > " + p) def sys_set_ptldebug(ptldebug): if config.ptldebug: ptldebug = config.ptldebug if ptldebug: try: val = eval(ptldebug, ptldebug_names) val = "0x%x" % (val) sysctl('portals/debug', val) except NameError, e: panic(str(e)) def sys_set_subsystem(subsystem): if config.subsystem: subsystem = config.subsystem if subsystem: try: val = eval(subsystem, subsystem_names) val = "0x%x" % (val) sysctl('portals/subsystem_debug', val) except NameError, e: panic(str(e)) def sys_set_netmem_max(path, max): debug("setting", path, "to at least", max) if config.noexec: return fp = open(path) str = fp.readline() fp.close() cur = int(str) if max > cur: fp = open(path, 'w') fp.write('%d\n' %(max)) fp.close() def sys_make_devices(): if not os.access('/dev/portals', os.R_OK): run('mknod /dev/portals c 10 240') if not os.access('/dev/obd', os.R_OK): run('mknod /dev/obd c 10 241') # Add dir to the global PATH, if not already there. def add_to_path(new_dir): syspath = string.split(os.environ['PATH'], ':') if new_dir in syspath: return os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir def default_debug_path(): path = '/tmp/lustre-log' if os.path.isdir('/r'): return '/r' + path else: return path def default_gdb_script(): script = '/tmp/ogdb' if os.path.isdir('/r'): return '/r' + script else: return script DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin') # ensure basic elements are in the system path def sanitise_path(): for dir in DEFAULT_PATH: add_to_path(dir) # global hack for the --select handling tgt_select = {} def init_select(args): # args = [service=nodeA,service2=nodeB service3=nodeC] global tgt_select for arg in args: list = string.split(arg, ',') for entry in list: srv, node = string.split(entry, '=') tgt_select[srv] = node def get_select(srv): if tgt_select.has_key(srv): return tgt_select[srv] return None FLAG = Lustre.Options.FLAG PARAM = Lustre.Options.PARAM INTPARAM = Lustre.Options.INTPARAM PARAMLIST = Lustre.Options.PARAMLIST lconf_options = [ ('verbose,v', "Print system commands as they are run"), ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM), ('config', "Cluster config name used for LDAP query", PARAM), ('select', "service=nodeA,service2=nodeB ", PARAMLIST), ('node', "Load config for ", PARAM), ('cleanup,d', "Cleans up config. (Shutdown)"), ('force,f', "Forced unmounting and/or obd detach during cleanup", FLAG, 0), ('single_socket', "socknal option: only use one socket instead of bundle", FLAG, 0), ('failover',"""Used to shut down without saving state. This will allow this node to "give up" a service to a another node for failover purposes. This will not be a clean shutdown.""", FLAG, 0), ('gdb', """Prints message after creating gdb module script and sleeps for 5 seconds."""), ('noexec,n', """Prints the commands and steps that will be run for a config without executing them. This can used to check if a config file is doing what it should be doing"""), ('nomod', "Skip load/unload module step."), ('nosetup', "Skip device setup/cleanup step."), ('reformat', "Reformat all devices (without question)"), ('mkfsoptions', "Additional options for the mk*fs command line", PARAM), ('dump', "Dump the kernel debug log to file before portals is unloaded", PARAM), ('write_conf', "Save all the client config information on mds."), ('record', "Write config information on mds."), ('record_log', "Name of config record log.", PARAM), ('record_device', "MDS device name that will record the config commands", PARAM), ('minlevel', "Minimum level of services to configure/cleanup", INTPARAM, 0), ('maxlevel', """Maximum level of services to configure/cleanup Levels are aproximatly like: 10 - netwrk 20 - device, ldlm 30 - osd, mdd 40 - mds, ost 70 - mountpoint, echo_client, osc, mdc, lov""", INTPARAM, 100), ('lustre', """Base directory of lustre sources. This parameter will cause lconf to load modules from a source tree.""", PARAM), ('portals', """Portals source directory. If this is a relative path, then it is assumed to be relative to lustre. """, PARAM), ('timeout', "Set recovery timeout", INTPARAM), ('upcall', "Set both portals and lustre upcall script", PARAM), ('lustre_upcall', "Set lustre upcall script", PARAM), ('portals_upcall', "Set portals upcall script", PARAM), ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM), ('ptldebug', "Set the portals debug level", PARAM), ('subsystem', "Set the portals debug subsystem", PARAM), ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()), ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()), # Client recovery options ('recover', "Recover a device"), ('group', "The group of devices to configure or cleanup", PARAM), ('tgt_uuid', "The failed target (required for recovery)", PARAM), ('client_uuid', "The failed client (required for recovery)", PARAM), ('conn_uuid', "The failed connection (required for recovery)", PARAM), ('inactive', """The name of an inactive service, to be ignored during mounting (currently OST-only). Can be repeated.""", PARAMLIST), ] def main(): global lctl, config, toplevel, CONFIG_FILE # in the upcall this is set to SIG_IGN signal.signal(signal.SIGCHLD, signal.SIG_DFL) cl = Lustre.Options("lconf", "config.xml", lconf_options) try: config, args = cl.parse(sys.argv[1:]) except Lustre.OptionError, e: print e sys.exit(1) setupModulePath(sys.argv[0]) host = socket.gethostname() # the PRNG is normally seeded with time(), which is not so good for starting # time-synchronized clusters input = open('/dev/urandom', 'r') if not input: print 'Unable to open /dev/urandom!' sys.exit(1) seed = input.read(32) input.close() random.seed(seed) sanitise_path() init_select(config.select) if len(args) > 0: if not os.access(args[0], os.R_OK): print 'File not found or readable:', args[0] sys.exit(1) try: dom = xml.dom.minidom.parse(args[0]) except Exception: panic("%s does not appear to be a config file." % (args[0])) sys.exit(1) # make sure to die here, even in debug mode. CONFIG_FILE = args[0] db = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement) if not config.config: config.config = os.path.basename(args[0])# use full path? if config.config[-4:] == '.xml': config.config = config.config[:-4] elif config.ldapurl: if not config.config: panic("--ldapurl requires --config name") dn = "config=%s,fs=lustre" % (config.config) db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl) elif config.ptldebug or config.subsystem: sys_set_ptldebug(None) sys_set_subsystem(None) sys.exit(0) else: print 'Missing config file or ldap URL.' print 'see lconf --help for command summary' sys.exit(1) toplevel = db ver = db.get_version() if not ver: panic("No version found in config data, please recreate.") if ver != Lustre.CONFIG_VERSION: panic("Config version", ver, "does not match lconf version", Lustre.CONFIG_VERSION) node_list = [] if config.node: node_list.append(config.node) else: if len(host) > 0: node_list.append(host) node_list.append('localhost') debug("configuring for host: ", node_list) if len(host) > 0: config.debug_path = config.debug_path + '-' + host config.gdb_script = config.gdb_script + '-' + host lctl = LCTLInterface('lctl') if config.lctl_dump: lctl.use_save_file(config.lctl_dump) if config.record: if not (config.record_device and config.record_log): panic("When recording, both --record_log and --record_device must be specified.") lctl.record(config.record_device, config.record_log) doHost(db, node_list) if config.record: lctl.end_record() if __name__ == "__main__": try: main() except Lustre.LconfError, e: print e # traceback.print_exc(file=sys.stdout) sys.exit(1) except CommandError, e: e.dump() sys.exit(e.rc) if first_cleanup_error: sys.exit(first_cleanup_error)