#
# Based in part on the XML obdctl modifications done by Brian Behlendorf
-import sys, getopt
-import string, os, stat, popen2, socket, time, random
+import sys, getopt, types
+import string, os, stat, popen2, socket, time, random, fcntl, select
import re, exceptions
import xml.dom.minidom
+if sys.version[0] == '1':
+ from FCNTL import F_GETFL, F_SETFL
+else:
+ from fcntl import F_GETFL, F_SETFL
+
# Global parameters
-TCP_ACCEPTOR = ''
MAXTCPBUF = 1048576
DEFAULT_TCPBUF = 1048576
#
print """usage: lconf config.xml
config.xml Lustre configuration in xml format.
---get <url> URL to fetch a config file
+--ldapurl LDAP server URL, eg. ldap://localhost
+--config Cluster config name used for LDAP query
--node <nodename> Load config for <nodename>
+--select service=nodeA,service2=nodeB U
-d | --cleanup Cleans up config. (Shutdown)
-f | --force Forced unmounting and/or obd detach during cleanup
-v | --verbose Print system commands as they are run
Levels are aproximatly like:
10 - network
20 - device, ldlm
- 30 - obd, mdd
+ 30 - osd, mdd
40 - mds, ost
50 - mdc, osc
- 60 - lov, lovconfig
+ 60 - lov
70 - mountpoint, echo_client
--lustre=src_dir Base directory of lustre sources. This parameter will cause lconf
to load modules from a source tree.
self._portals_dir = ''
self._minlevel = 0
self._maxlevel = 100
+ self._timeout = 0
+ self._recovery_upcall = ''
+ self._ldapurl = ''
+ self._config_name = ''
+ self._select = {}
+ self._lctl_dump = ''
def verbose(self, flag = None):
if flag: self._verbose = flag
if val: self._node = val
return self._node
- def url(self, val = None):
- if val: self._url = val
- return self._url
-
def gdb_script(self):
if os.path.isdir('/r'):
return '/r' + self._gdb_script
def dump_file(self, val = None):
if val: self._dump_file = val
return self._dump_file
-
def minlevel(self, val = None):
if val: self._minlevel = int(val)
return self._minlevel
if val: self._lustre_dir = val
return self._lustre_dir
+ def timeout(self, val = None):
+ if val: self._timeout = val
+ return self._timeout
+
+ def recovery_upcall(self, val = None):
+ if val: self._recovery_upcall = val
+ return self._recovery_upcall
+
+ def ldapurl(self, val = None):
+ if val: self._ldapurl = val
+ return self._ldapurl
+
+ def config_name(self, val = None):
+ if val: self._config_name = val
+ return self._config_name
+
+ def init_select(self, arg):
+ # arg = "service=nodeA,service2=nodeB"
+ list = string.split(arg, ',')
+ for entry in list:
+ srv, node = string.split(entry, '=')
+ self._select[srv] = node
+
+ def select(self, srv):
+ if self._select.has_key(srv):
+ return self._select[srv]
+ return None
+
+ def lctl_dump(self, val = None):
+ if val: self._lctl_dump = val
+ return self._lctl_dump
+
config = Config()
# ============================================================
+# handle daemons, like the acceptor
+class DaemonHandler:
+ """ Manage starting and stopping a daemon. Assumes daemon manages
+ it's own pid file. """
+
+ def __init__(self, cmd):
+ self.command = cmd
+ self.path =""
+
+ def start(self):
+ if self.running():
+ log(self.command, "already running.")
+ if not self.path:
+ self.path = find_prog(self.command)
+ if not self.path:
+ panic(self.command, "not found.")
+ ret, out = runcmd(self.path +' '+ self.command_line())
+ if ret:
+ raise CommandError(self.path, out, ret)
+
+ def stop(self):
+ if self.running():
+ pid = self.read_pidfile()
+ try:
+ log ("killing process", pid)
+ os.kill(pid, 15)
+ #time.sleep(1) # let daemon die
+ except OSError, e:
+ log("unable to kill", self.command, e)
+ if self.running():
+ log("unable to kill", self.command)
+
+ def running(self):
+ pid = self.read_pidfile()
+ if pid:
+ try:
+ os.kill(pid, 0)
+ except OSError:
+ self.clean_pidfile()
+ else:
+ return 1
+ return 0
+
+ def read_pidfile(self):
+ try:
+ fp = open(self.pidfile(), 'r')
+ pid = int(fp.read())
+ fp.close()
+ return pid
+ except IOError:
+ return 0
+
+ def clean_pidfile(self):
+ """ Remove a stale pidfile """
+ log("removing stale pidfile:", self.pidfile())
+ try:
+ os.unlink(self.pidfile())
+ except OSError, e:
+ log(self.pidfile(), e)
+
+class AcceptorHandler(DaemonHandler):
+ def __init__(self, port, net_type, send_mem, recv_mem, irq_aff, nid_xchg):
+ DaemonHandler.__init__(self, "acceptor")
+ self.port = port
+ self.flags = ''
+ self.send_mem = send_mem
+ self.recv_mem = recv_mem
+
+ if net_type == 'toe':
+ self.flags = self.flags + ' -N 4'
+ if irq_aff:
+ self.flags = self.flags + ' -i'
+ if nid_xchg:
+ self.flags = self.flags + ' -x'
+
+ def pidfile(self):
+ return "/var/run/%s-%d.pid" % (self.command, self.port)
+
+ def command_line(self):
+ return string.join(map(str,('-s', self.send_mem, '-r', self.recv_mem, self.flags, self.port)))
+
+acceptors = {}
+
+# start the acceptors
+def run_acceptors():
+ for port in acceptors.keys():
+ daemon = acceptors[port]
+ if not daemon.running():
+ daemon.start()
+
+def stop_acceptor(port):
+ if acceptors.has_key(port):
+ daemon = acceptors[port]
+ if daemon.running():
+ daemon.stop()
+
+
+# ============================================================
# handle lctl interface
class LCTLInterface:
"""
Initialize close by finding the lctl binary.
"""
self.lctl = find_prog(cmd)
+ self.save_file = ''
if not self.lctl:
if config.noexec():
debug('! lctl not found')
else:
raise CommandError('lctl', "unable to find lctl binary.")
+ def use_save_file(self, file):
+ self.save_file = file
+
+ def set_nonblock(self, fd):
+ fl = fcntl.fcntl(fd, F_GETFL)
+ fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
+
def run(self, cmds):
"""
run lctl
should modify command line to accept multiple commands, or
create complex command line options
"""
- debug("+", self.lctl, cmds)
+ cmd_line = self.lctl
+ if self.save_file:
+ cmds = '\n dump ' + self.save_file + cmds
+
+ debug("+", cmd_line, cmds)
if config.noexec(): return (0, [])
- p = popen2.Popen3(self.lctl, 1)
- p.tochild.write(cmds + "\n")
- p.tochild.close()
- out = p.fromchild.readlines()
- err = p.childerr.readlines()
- ret = p.wait()
+
+ child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
+ child.tochild.write(cmds + "\n")
+ child.tochild.close()
+
+ # From "Python Cookbook" from O'Reilly
+ outfile = child.fromchild
+ outfd = outfile.fileno()
+ self.set_nonblock(outfd)
+ errfile = child.childerr
+ errfd = errfile.fileno()
+ self.set_nonblock(errfd)
+
+ outdata = errdata = ''
+ outeof = erreof = 0
+ while 1:
+ ready = select.select([outfd,errfd],[],[]) # Wait for input
+ if outfd in ready[0]:
+ outchunk = outfile.read()
+ if outchunk == '': outeof = 1
+ outdata = outdata + outchunk
+ if errfd in ready[0]:
+ errchunk = errfile.read()
+ if errchunk == '': erreof = 1
+ errdata = errdata + errchunk
+ if outeof and erreof: break
+ # end of "borrowed" code
+
+ ret = child.wait()
if os.WIFEXITED(ret):
rc = os.WEXITSTATUS(ret)
else:
rc = 0
- if rc or len(err):
- raise CommandError(self.lctl, err, rc)
- return rc, out
+ if rc or len(errdata):
+ raise CommandError(self.lctl, errdata, rc)
+ return rc, outdata
def runcmd(self, *args):
"""
cmds = """
network %s
mynid %s
- add_uuid self %s
- quit""" % (net, nid, nid)
- else:
- cmds = """
- network %s
- add_uuid self %s
- quit""" % (net, nid)
-
- self.run(cmds)
+ quit """ % (net, nid)
+ self.run(cmds)
# create a new connection
- def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
- if net in ('tcp', 'toe'):
- cmds = """
+ def connect(self, srv):
+ cmds = "\n add_uuid %s %s %s" % (srv.uuid, srv.nid, srv.net_type)
+ if srv.net_type in ('tcp', 'toe') and not config.lctl_dump():
+ flags = ''
+ if srv.irq_affinity:
+ flags = flags + 'i'
+ if srv.nid_exchange:
+ flags = flags + 'x'
+ cmds = """%s
network %s
- add_uuid %s %s
send_mem %d
recv_mem %d
- connect %s %d
- quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, )
- else:
- cmds = """
- network %s
- add_uuid %s %s
- connect %s %d
- quit""" % (net, servuuid, nid, nid, port, )
-
+ connect %s %d %s""" % (cmds, srv.net_type,
+ srv.send_mem,
+ srv.recv_mem,
+ srv.hostaddr, srv.port, flags )
+
+ cmds = cmds + "\n quit"
self.run(cmds)
# add a route to a range
cmds = """
network %s
add_route %s %s %s
- quit """ % (net, gw, lo, hi)
+ quit """ % (net,
+ gw, lo, hi)
self.run(cmds)
def add_route_host(self, net, uuid, gw, tgt):
cmds = """
network %s
- add_uuid %s %s
+ add_uuid %s %s %s
add_route %s %s
- quit """ % (net, uuid, tgt, gw, tgt)
+ quit """ % (net,
+ uuid, tgt, net,
+ gw, tgt)
self.run(cmds)
# add a route to a range
cmds = """
ignore_errors
network %s
- del_uuid self
disconnect
quit""" % (net)
self.run(cmds)
cmds = """
ignore_errors
device $%s
- cleanup
- detach %s
+ cleanup %s
+ detach
quit""" % (name, ('', 'force')[config.force()])
self.run(cmds)
# Run a command and return the output and status.
# stderr is sent to /dev/null, could use popen3 to
# save it if necessary
-def run(*args):
- cmd = string.join(map(str,args))
+def runcmd(cmd):
debug ("+", cmd)
if config.noexec(): return (0, [])
f = os.popen(cmd + ' 2>&1')
ret = 0
return (ret, out)
+def run(*args):
+ cmd = string.join(map(str,args))
+ return runcmd(cmd)
+
# Run a command in the background.
def run_daemon(*args):
cmd = string.join(map(str,args))
cmdpath = os.path.dirname(sys.argv[0])
syspath.insert(0, cmdpath);
if config.portals_dir():
- syspath.insert(0, os.path.join(cmdpath, config.portals_dir()+'/linux/utils/'))
+ syspath.insert(0, os.path.join(config.portals_dir()+'/linux/utils/'))
for d in syspath:
prog = os.path.join(d,cmd)
- debug(prog)
if os.access(prog, os.X_OK):
return prog
return ''
# build fs according to type
# fixme: dangerous
-def mkfs(fstype, dev):
+def mkfs(dev, devsize, fstype):
+ block_cnt = ''
+ if devsize:
+ # devsize is in 1k, and fs block count is in 4k
+ block_cnt = devsize/4
+
if(fstype in ('ext3', 'extN')):
- mkfs = 'mkfs.ext2 -j -b 4096'
+ mkfs = 'mkfs.ext2 -j -b 4096 -F '
elif (fstype == 'reiserfs'):
- mkfs = 'mkfs.reiserfs -f'
+ mkfs = 'mkreiserfs -ff'
else:
print 'unsupported fs type: ', fstype
- if not is_block(dev):
- if(fstype in ('ext3', 'extN')):
- force = '-F'
- elif (fstype == 'reiserfs'):
- force = ''
- else:
- print 'unsupported fs type: ', fstype
- else:
- force = ''
- (ret, out) = run (mkfs, force, dev)
+
+ (ret, out) = run (mkfs, dev, block_cnt)
if ret:
panic("Unable to build fs:", dev)
# enable hash tree indexing on fsswe
return dev
if config.reformat() or not os.access(file, os.R_OK | os.W_OK):
if size < 8000:
- error(file, "size must be larger than 8MB")
- run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file))
+ panic(file, "size must be larger than 8MB, currently set to:", size)
+ (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size,
+ file))
+ if ret:
+ panic("Unable to create backing store:", file)
+
loop = loop_base()
# find next free loop
for n in xrange(0, MAX_LOOP_DEVICES):
if not is_block(dev):
dev = init_loop(dev, size, fstype)
if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
- mkfs(fstype, dev)
+ mkfs(dev, size, fstype)
# else:
# panic("device:", dev,
ip = string.split(addr, ':')[1]
return ip
+def get_local_nid(net_type, wildcard):
+ """Return the local nid. First look for an elan interface,
+ then use the local address. """
+ local = ""
+ if os.access('/proc/elan/device0/position', os.R_OK):
+ local = get_local_address('elan', '*')
+ else:
+ local = get_local_address(net_type, wildcard)
+ return local
+
def get_local_address(net_type, wildcard):
"""Return the local address for the network type."""
local = ""
"""Return true if a device exists for the uuid"""
# expect this format:
# 1 UP ldlm ldlm ldlm_UUID 2
+ if config.lctl_dump():
+ return 0
try:
out = lctl.device_list()
for s in out:
except CommandError, e:
e.dump()
return 0
+
+def is_network_prepared():
+ """If the PTLRPC device exists, then assumet that all networking
+ has been configured"""
+ if config.lctl_dump():
+ return 0
+ try:
+ out = lctl.device_list()
+ for s in out:
+ if 'RPCDEV_UUID' == string.split(s)[4]:
+ return 1
+ except CommandError, e:
+ e.dump()
+ return 0
+
def fs_is_mounted(path):
"""Return true if path is a mounted lustre filesystem"""
""" Base class for the rest of the modules. The default cleanup method is
defined here, as well as some utilitiy funcs.
"""
- def __init__(self, module_name, dom_node):
- self.dom_node = dom_node
+ def __init__(self, module_name, db):
+ self.db = db
self.module_name = module_name
- self.name = get_attr(dom_node, 'name')
- self.uuid = get_attr(dom_node, 'uuid')
+ self.name = self.db.getName()
+ self.uuid = self.db.getUUID()
self.kmodule_list = []
self._server = None
self._connected = 0
msg = string.join(map(str,args))
print self.module_name + ":", self.name, self.uuid, msg
-
- def lookup_server(self, srv_uuid):
- """ Lookup a server's network information """
- net = get_ost_net(self.dom_node.parentNode, srv_uuid)
- if not net:
- panic ("Unable to find a server for:", srv_uuid)
- self._server = Network(net)
-
- def get_server(self):
- return self._server
-
def cleanup(self):
""" default cleanup, used for most modules """
self.info()
- srv = self.get_server()
- if srv and local_net(srv):
- try:
- lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
- except CommandError, e:
- log(self.module_name, "disconnect failed: ", self.name)
- e.dump()
- cleanup_error(e.rc)
try:
lctl.cleanup(self.name, self.uuid)
except CommandError, e:
log(self.module_name, "cleanup failed: ", self.name)
e.dump()
cleanup_error(e.rc)
-
+
def add_portals_module(self, dev_dir, modname):
"""Append a module to list of modules to load."""
self.kmodule_list.append((config.portals_dir(), dev_dir, modname))
log('! unable to unload module:', mod)
logall(out)
-
class Network(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'NETWORK', dom_node)
- self.net_type = get_attr(dom_node,'type')
- self.nid = get_text(dom_node, 'server', '*')
- self.port = get_text_int(dom_node, 'port', 0)
- self.send_mem = get_text_int(dom_node, 'send_mem', DEFAULT_TCPBUF)
- self.recv_mem = get_text_int(dom_node, 'recv_mem', DEFAULT_TCPBUF)
+ def __init__(self,db):
+ Module.__init__(self, 'NETWORK', db)
+ self.net_type = self.db.get_val('nettype')
+ self.nid = self.db.get_val('nid', '*')
+ self.port = self.db.get_val_int('port', 0)
+ self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF)
+ self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF)
+ self.irq_affinity = self.db.get_val_int('irqaffinity', 0)
+ self.nid_exchange = self.db.get_val_int('nidexchange', 0)
+
if '*' in self.nid:
- self.nid = get_local_address(self.net_type, self.nid)
+ self.nid = get_local_nid(self.net_type, self.nid)
if not self.nid:
panic("unable to set nid for", self.net_type, self.nid)
debug("nid:", self.nid)
+ self.hostaddr = self.db.get_val('hostaddr', self.nid)
+ if '*' in self.hostaddr:
+ self.hostaddr = get_local_address(self.net_type, self.hostaddr)
+ if not self.nid:
+ panic("unable to set nid for", self.net_type, self.hostaddr)
+ debug("hostaddr:", self.hostaddr)
+ # debug ( "hostaddr ", self.hostaddr, "net_type", self.net_type)
+
self.add_portals_module("linux/oslib", 'portals')
if node_needs_router():
self.add_portals_module("linux/router", 'kptlrouter')
if self.net_type == 'tcp':
self.add_portals_module("linux/socknal", 'ksocknal')
if self.net_type == 'toe':
- self.add_portals_odule("/linux/toenal", 'ktoenal')
+ self.add_portals_module("/linux/toenal", 'ktoenal')
if self.net_type == 'elan':
self.add_portals_module("/linux/rqswnal", 'kqswnal')
if self.net_type == 'gm':
self.add_portals_module("/linux/gmnal", 'kgmnal')
self.add_lustre_module('obdclass', 'obdclass')
- self.add_lustre_module('ptlrpc', 'ptlrpc')
def prepare(self):
+ if is_network_prepared():
+ return
self.info(self.net_type, self.nid, self.port)
- if self.net_type in ('tcp', 'toe'):
- nal_id = '' # default is socknal
- if self.net_type == 'toe':
- nal_id = '-N 4'
- ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, nal_id, self.port)
- if ret:
- raise CommandError(TCP_ACCEPTOR, out, ret)
- ret = self.dom_node.getElementsByTagName('route_tbl')
- for a in ret:
- for r in a.getElementsByTagName('route'):
- net_type = get_attr(r, 'type')
- gw = get_attr(r, 'gw')
- lo = get_attr(r, 'lo')
- hi = get_attr(r,'hi', '')
- lctl.add_route(net_type, gw, lo, hi)
- if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
- srv = nid2server(self.dom_node.parentNode.parentNode, lo)
- if not srv:
- panic("no server for nid", lo)
- else:
- lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-
-
lctl.network(self.net_type, self.nid)
- lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
def cleanup(self):
self.info(self.net_type, self.nid, self.port)
- ret = self.dom_node.getElementsByTagName('route_tbl')
- for a in ret:
- for r in a.getElementsByTagName('route'):
- lo = get_attr(r, 'lo')
- hi = get_attr(r,'hi', '')
- if self.net_type in ('tcp', 'toe') and hi == '':
- srv = nid2server(self.dom_node.parentNode.parentNode, lo)
- if not srv:
- panic("no server for nid", lo)
- else:
- try:
- lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
- except CommandError, e:
- print "disconnect failed: ", self.name
- e.dump()
- cleanup_error(e.rc)
- try:
- lctl.del_route(self.net_type, self.nid, lo, hi)
- except CommandError, e:
- print "del_route failed: ", self.name
- e.dump()
- cleanup_error(e.rc)
-
- try:
- lctl.cleanup("RPCDEV", "RPCDEV_UUID")
- except CommandError, e:
- print "cleanup failed: ", self.name
- e.dump()
- cleanup_error(e.rc)
+ if self.net_type in ('tcp', 'toe'):
+ stop_acceptor(self.port)
try:
lctl.disconnectAll(self.net_type)
except CommandError, e:
print "disconnectAll failed: ", self.name
e.dump()
cleanup_error(e.rc)
- if self.net_type in ('tcp', 'toe'):
- # yikes, this ugly! need to save pid in /var/something
- run("killall acceptor")
+
+class Router(Module):
+ def __init__(self,db):
+ Module.__init__(self, 'ROUTER', db)
+ def prepare(self):
+ if is_network_prepared():
+ return
+ self.info()
+ for net_type, gw, lo, hi in self.db.get_route_tbl():
+ lctl.add_route(net_type, gw, lo, hi)
+ if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+ srvdb = self.db.nid2server(lo, net_type)
+
+ if not srvdb:
+ panic("no server for nid", lo)
+ else:
+ srv = Network(srvdb)
+ lctl.connect(srv)
+ def cleanup(self):
+ for net_type, gw, lo, hi in self.db.get_route_tbl():
+ if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+ srvdb = self.db.nid2server(lo, net_type)
+ if not srvdb:
+ panic("no server for nid", lo)
+ else:
+ srv = Network(srvdb)
+ try:
+ lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+ except CommandError, e:
+ print "disconnect failed: ", self.name
+ e.dump()
+ cleanup_error(e.rc)
+ try:
+ lctl.del_route(net_type, gw, lo, hi)
+ except CommandError, e:
+ print "del_route failed: ", self.name
+ e.dump()
+ cleanup_error(e.rc)
class LDLM(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'LDLM', dom_node)
+ def __init__(self,db):
+ Module.__init__(self, 'LDLM', db)
self.add_lustre_module('ldlm', 'ldlm')
def prepare(self):
if is_prepared(self.uuid):
return
self.info()
- lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
- setup ="")
+ lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid))
+ def cleanup(self):
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
+
+class PTLRPC(Module):
+ def __init__(self,db):
+ Module.__init__(self, 'PTLRPC', db)
+ self.add_lustre_module('ptlrpc', 'ptlrpc')
+ def prepare(self):
+ if is_prepared(self.uuid):
+ return
+ self.info()
+ lctl.newdev(attach="ptlrpc %s %s" % (self.name, self.uuid))
+ def cleanup(self):
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
class LOV(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'LOV', dom_node)
- self.mds_uuid = get_first_ref(dom_node, 'mds')
- mds= lookup(dom_node.parentNode, self.mds_uuid)
- self.mds_name = getName(mds)
- devs = dom_node.getElementsByTagName('devices')
- if len(devs) > 0:
- dev_node = devs[0]
- self.stripe_sz = get_attr_int(dev_node, 'stripesize', 65536)
- self.stripe_off = get_attr_int(dev_node, 'stripeoffset', 0)
- self.pattern = get_attr_int(dev_node, 'pattern', 0)
- self.devlist = get_all_refs(dev_node, 'osc')
- self.stripe_cnt = get_attr_int(dev_node, 'stripecount', len(self.devlist))
+ def __init__(self,db):
+ Module.__init__(self, 'LOV', db)
self.add_lustre_module('mdc', 'mdc')
self.add_lustre_module('lov', 'lov')
-
+ self.mds_uuid = self.db.get_first_ref('mds')
+ mds= self.db.lookup(self.mds_uuid)
+ self.mds_name = mds.getName()
+ self.stripe_sz = self.db.get_val_int('stripesize', 65536)
+ self.stripe_off = self.db.get_val_int('stripeoffset', 0)
+ self.pattern = self.db.get_val_int('stripepattern', 0)
+ self.devlist = self.db.get_refs('obd')
+ self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
+ self.osclist = []
+ self.mdc_uudi = ''
+ for obd_uuid in self.devlist:
+ obd = self.db.lookup(obd_uuid)
+ osc = get_osc(obd, self.name)
+ if osc:
+ self.osclist.append(osc)
+ else:
+ panic('osc not found:', obd_uuid)
+
def prepare(self):
if is_prepared(self.uuid):
return
- for osc_uuid in self.devlist:
- osc = lookup(self.dom_node.parentNode, osc_uuid)
- if osc:
- n = OSC(osc)
- try:
- # Ignore connection failures, because the LOV will DTRT with
- # an unconnected OSC.
- n.prepare(ignore_connect_failure=1)
- except CommandError:
- print "Error preparing OSC %s (inactive)\n" % osc_uuid
- else:
- panic('osc not found:', osc_uuid)
- mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
+ for osc in self.osclist:
+ try:
+ # Ignore connection failures, because the LOV will DTRT with
+ # an unconnected OSC.
+ osc.prepare(ignore_connect_failure=1)
+ except CommandError:
+ print "Error preparing OSC %s (inactive)\n" % osc.uuid
+ self.mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
self.stripe_off, self.pattern, self.devlist, self.mds_name)
lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
- setup ="%s" % (mdc_uuid))
+ setup ="%s" % (self.mdc_uuid))
def cleanup(self):
- if not is_prepared(self.uuid):
- return
- for osc_uuid in self.devlist:
- osc = lookup(self.dom_node.parentNode, osc_uuid)
- if osc:
- n = OSC(osc)
- n.cleanup()
- else:
- panic('osc not found:', osc_uuid)
- Module.cleanup(self)
- cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
-
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
+ for osc in self.osclist:
+ osc.cleanup()
+ cleanup_mdc(self.db, self.name, self.mds_uuid)
def load_module(self):
- for osc_uuid in self.devlist:
- osc = lookup(self.dom_node.parentNode, osc_uuid)
- if osc:
- n = OSC(osc)
- n.load_module()
- break
- else:
- panic('osc not found:', osc_uuid)
+ for osc in self.osclist:
+ osc.load_module()
+ break
Module.load_module(self)
-
def cleanup_module(self):
Module.cleanup_module(self)
- for osc_uuid in self.devlist:
- osc = lookup(self.dom_node.parentNode, osc_uuid)
- if osc:
- n = OSC(osc)
- n.cleanup_module()
- break
- else:
- panic('osc not found:', osc_uuid)
+ for osc in self.osclist:
+ osc.cleanup_module()
+ break
class LOVConfig(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'LOVConfig', dom_node)
- self.lov_uuid = get_first_ref(dom_node, 'lov')
- l = lookup(dom_node.parentNode, self.lov_uuid)
+ def __init__(self,db):
+ Module.__init__(self, 'LOVConfig', db)
+
+ self.lov_uuid = self.db.get_first_ref('lov')
+ l = self.db.lookup(self.lov_uuid)
self.lov = LOV(l)
def prepare(self):
#nothing to do here
pass
-
-class MDS(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'MDS', dom_node)
- self.devname, self.size = get_device(dom_node)
- self.fstype = get_text(dom_node, 'fstype')
+class MDSDEV(Module):
+ def __init__(self,db):
+ Module.__init__(self, 'MDSDEV', db)
+ self.devpath = self.db.get_val('devpath','')
+ self.size = self.db.get_val_int('devsize', 0)
+ self.fstype = self.db.get_val('fstype', '')
+ # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
+ target_uuid = self.db.get_first_ref('target')
+ mds = self.db.lookup(target_uuid)
+ self.name = mds.getName()
+ self.lovconfig_uuids = mds.get_refs('lovconfig')
# FIXME: if fstype not set, then determine based on kernel version
- self.format = get_text(dom_node, 'autoformat', "no")
+ self.format = self.db.get_val('autoformat', "no")
+
+ active_uuid = mds.get_active_target()
+ if not active_uuid:
+ panic("No target device found:", target_uuid)
+ if active_uuid == self.uuid:
+ self.active = 1
+ else:
+ self.active = 0
+ self.target_dev_uuid = self.uuid
+ self.uuid = target_uuid
+ # modules
if self.fstype == 'extN':
self.add_lustre_module('extN', 'extN')
self.add_lustre_module('mds', 'mds')
- self.add_lustre_module('obdclass', 'fsfilt_%s'%(self.fstype))
+ if self.fstype:
+ self.add_lustre_module('obdclass', 'fsfilt_%s' % (self.fstype))
+
+ def load_module(self):
+ if self.active:
+ Module.load_module(self)
def prepare(self):
if is_prepared(self.uuid):
return
- self.info(self.devname, self.fstype, self.format)
- blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
+ if not self.active:
+ debug(self.uuid, "not active")
+ return
+ self.info(self.devpath, self.fstype, self.format)
+ run_acceptors()
+ blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
if not is_prepared('MDT_UUID'):
lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'),
setup ="")
lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
setup ="%s %s" %(blkdev, self.fstype))
+ for uuid in self.lovconfig_uuids:
+ db = self.db.lookup(uuid)
+ lovconfig = LOVConfig(db)
+ lovconfig.prepare()
+
def cleanup(self):
if is_prepared('MDT_UUID'):
try:
print "cleanup failed: ", self.name
e.dump()
cleanup_error(e.rc)
- if not is_prepared(self.uuid):
- return
- Module.cleanup(self)
- clean_loop(self.devname)
-
-# Very unusual case, as there is no MDC element in the XML anymore
-# Builds itself from an MDS node
-class MDC(Module):
- def __init__(self,dom_node):
- self.mds = MDS(dom_node)
- self.dom_node = dom_node
- self.module_name = 'MDC'
- self.kmodule_list = []
- self._server = None
- self._connected = 0
-
- host = socket.gethostname()
- self.name = 'MDC_%s' % (self.mds.name)
- self.uuid = '%s_%05x_%05x' % (self.name, int(random.random() * 1048576),
- int(random.random() * 1048576))
-
- self.lookup_server(self.mds.uuid)
- self.add_lustre_module('mdc', 'mdc')
-
- def prepare(self):
if is_prepared(self.uuid):
- return
- self.info(self.mds.uuid)
- srv = self.get_server()
- lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
- lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
- setup ="%s %s" %(self.mds.uuid, srv.uuid))
-
-class OBD(Module):
- def __init__(self, dom_node):
- Module.__init__(self, 'OBD', dom_node)
- self.obdtype = get_attr(dom_node, 'type')
- self.devname, self.size = get_device(dom_node)
- self.fstype = get_text(dom_node, 'fstype')
+ Module.cleanup(self)
+ clean_loop(self.devpath)
+
+class OSD(Module):
+ def __init__(self, db):
+ Module.__init__(self, 'OSD', db)
+ self.osdtype = self.db.get_val('osdtype')
+ self.devpath = self.db.get_val('devpath', '')
+ self.size = self.db.get_val_int('devsize', 0)
+ self.fstype = self.db.get_val('fstype', '')
+ target_uuid = self.db.get_first_ref('target')
+ ost = self.db.lookup(target_uuid)
+ self.name = ost.getName()
# FIXME: if fstype not set, then determine based on kernel version
- self.format = get_text(dom_node, 'autoformat', 'yes')
+ self.format = self.db.get_val('autoformat', 'yes')
if self.fstype == 'extN':
self.add_lustre_module('extN', 'extN')
- self.add_lustre_module(self.obdtype, self.obdtype)
+
+ active_uuid = ost.get_active_target()
+ if not active_uuid:
+ panic("No target device found:", target_uuid)
+ if active_uuid == self.uuid:
+ self.active = 1
+ else:
+ self.active = 0
+ self.target_dev_uuid = self.uuid
+ self.uuid = target_uuid
+ # modules
+ self.add_lustre_module('ost', 'ost')
+ self.add_lustre_module(self.osdtype, self.osdtype)
if self.fstype:
self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
+ def load_module(self):
+ if self.active:
+ Module.load_module(self)
+
# need to check /proc/mounts and /etc/mtab before
# formatting anything.
# FIXME: check if device is already formatted.
def prepare(self):
if is_prepared(self.uuid):
return
- self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
- if self.obdtype == 'obdecho':
+ if not self.active:
+ debug(self.uuid, "not active")
+ return
+ self.info(self.osdtype, self.devpath, self.size, self.fstype, self.format)
+ run_acceptors()
+ if self.osdtype == 'obdecho':
blkdev = ''
else:
- blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
- lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
+ blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
+ lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid),
setup ="%s %s" %(blkdev, self.fstype))
+ if not is_prepared('OSS_UUID'):
+ lctl.newdev(attach="ost %s %s" % ('OSS', 'OSS_UUID'),
+ setup ="")
+
def cleanup(self):
- if not is_prepared(self.uuid):
+ if is_prepared('OSS_UUID'):
+ try:
+ lctl.cleanup("OSS", "OSS_UUID")
+ except CommandError, e:
+ print "cleanup failed: ", self.name
+ e.dump()
+ cleanup_error(e.rc)
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
+ if not self.osdtype == 'obdecho':
+ clean_loop(self.devpath)
+
+# Generic client module, used by OSC and MDC
+class Client(Module):
+ def __init__(self, tgtdb, module, owner):
+ self.target_name = tgtdb.getName()
+ self.target_uuid = tgtdb.getUUID()
+ self.db = tgtdb
+
+ self.tgt_dev_uuid = tgtdb.get_active_target()
+ if not self.tgt_dev_uuid:
+ panic("No target device found for target:", self.target_name)
+
+ self.kmodule_list = []
+ self._server = None
+ self._connected = 0
+
+ self.module = module
+ self.module_name = string.upper(module)
+ self.name = '%s_%s_%s' % (self.module_name, owner, self.target_name)
+ self.uuid = '%05x%05x_%.14s_%05x%05x' % (int(random.random() * 1048576),
+ int(random.random() * 1048576),self.name,
+ int(random.random() * 1048576),
+ int(random.random() * 1048576))
+ self.uuid = self.uuid[0:36]
+ self.lookup_server(self.tgt_dev_uuid)
+ self.add_lustre_module(module, module)
+
+ def lookup_server(self, srv_uuid):
+ """ Lookup a server's network information """
+ self._server_nets = self.db.get_ost_net(srv_uuid)
+ if len(self._server_nets) == 0:
+ panic ("Unable to find a server for:", srv_uuid)
+
+ def get_servers(self):
+ return self._server_nets
+
+ def prepare(self, ignore_connect_failure = 0):
+ if is_prepared(self.uuid):
return
+ self.info(self.target_uuid)
+ try:
+ srv = local_net(self.get_servers())
+ if srv:
+ lctl.connect(srv)
+ else:
+ srv, r = find_route(self.get_servers())
+ if srv:
+ lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
+ else:
+ panic ("no route to", self.target_uuid)
+ except CommandError:
+ if (ignore_connect_failure == 0):
+ pass
+ if srv:
+ lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
+ setup ="%s %s" %(self.target_uuid, srv.uuid))
+
+ def cleanup(self):
Module.cleanup(self)
- if not self.obdtype == 'obdecho':
- clean_loop(self.devname)
+ srv = local_net(self.get_servers())
+ if srv:
+ try:
+ lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+ except CommandError, e:
+ log(self.module_name, "disconnect failed: ", self.name)
+ e.dump()
+ cleanup_error(e.rc)
+ else:
+ self.info(self.target_uuid)
+ srv, r = find_route(self.get_servers())
+ if srv:
+ try:
+ lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
+ except CommandError, e:
+ print "del_route failed: ", self.name
+ e.dump()
+ cleanup_error(e.rc)
+
+
+
+class MDC(Client):
+ def __init__(self, db, owner):
+ Client.__init__(self, db, 'mdc', owner)
+class OSC(Client):
+ def __init__(self, db, owner):
+ Client.__init__(self, db, 'osc', owner)
+
+
class COBD(Module):
- def __init__(self, dom_node):
- Module.__init__(self, 'COBD', dom_node)
- self.real_uuid = get_first_ref(dom_node, 'real_obd')
- self.cache_uuid = get_first_ref(dom_node, 'cache_obd')
+ def __init__(self, db):
+ Module.__init__(self, 'COBD', db)
+ self.real_uuid = self.db.get_first_ref('realobd')
+ self.cache_uuid = self.db.get_first_ref('cacheobd')
self.add_lustre_module('cobd' , 'cobd')
# need to check /proc/mounts and /etc/mtab before
lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
setup ="%s %s" %(self.real_uuid, self.cache_uuid))
-class OST(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'OST', dom_node)
- self.obd_uuid = get_first_ref(dom_node, 'obd')
- self.add_lustre_module('ost', 'ost')
-
- def prepare(self):
- if is_prepared(self.uuid):
- return
- self.info(self.obd_uuid)
- lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
- setup ="%s" % (self.obd_uuid))
-
# virtual interface for OSC and LOV
class VOSC(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'VOSC', dom_node)
- if dom_node.nodeName == 'lov':
- self.osc = LOV(dom_node)
+ def __init__(self,db, owner):
+ Module.__init__(self, 'VOSC', db)
+ if db.get_class() == 'lov':
+ self.osc = LOV(db)
else:
- self.osc = OSC(dom_node)
+ self.osc = get_osc(db, owner)
+ def get_uuid(self):
+ return self.osc.uuid
def prepare(self):
self.osc.prepare()
def cleanup(self):
self.osc.load_module()
def cleanup_module(self):
self.osc.cleanup_module()
-
+ def need_mdc(self):
+ return self.db.get_class() != 'lov'
+ def get_mdc_uuid(self):
+ if self.db.get_class() == 'lov':
+ return self.osc.mdc_uuid
+ return ''
-class OSC(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'OSC', dom_node)
- self.obd_uuid = get_first_ref(dom_node, 'obd')
- self.ost_uuid = get_first_ref(dom_node, 'ost')
- self.lookup_server(self.ost_uuid)
- self.add_lustre_module('osc', 'osc')
-
- def prepare(self, ignore_connect_failure = 0):
- if is_prepared(self.uuid):
- return
- self.info(self.obd_uuid, self.ost_uuid)
- srv = self.get_server()
- try:
- if local_net(srv):
- lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
- else:
- r = find_route(srv)
- if r:
- lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
- else:
- panic ("no route to", srv.nid)
- except CommandError:
- if (ignore_connect_failure == 0):
- pass
-
- lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
- setup ="%s %s" %(self.obd_uuid, srv.uuid))
-
- def cleanup(self):
- if not is_prepared(self.uuid):
- return
- srv = self.get_server()
- if local_net(srv):
- Module.cleanup(self)
- else:
- self.info(self.obd_uuid, self.ost_uuid)
- r = find_route(srv)
- if r:
- try:
- lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
- except CommandError, e:
- print "del_route failed: ", self.name
- e.dump()
- cleanup_error(e.rc)
- Module.cleanup(self)
-
class ECHO_CLIENT(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'ECHO_CLIENT', dom_node)
+ def __init__(self,db):
+ Module.__init__(self, 'ECHO_CLIENT', db)
self.add_lustre_module('obdecho', 'obdecho')
- self.lov_uuid = get_first_ref(dom_node, 'osc')
- l = lookup(self.dom_node.parentNode, self.lov_uuid)
- self.osc = VOSC(l)
+ self.obd_uuid = self.db.get_first_ref('obd')
+ obd = self.db.lookup(self.obd_uuid)
+ self.osc = VOSC(obd, self.name)
def prepare(self):
if is_prepared(self.uuid):
return
self.osc.prepare() # XXX This is so cheating. -p
- self.info(self.lov_uuid)
-
+ self.info(self.obd_uuid)
+
lctl.newdev(attach="echo_client %s %s" % (self.name, self.uuid),
- setup = self.lov_uuid)
+ setup = self.osc.get_uuid())
def cleanup(self):
- if not is_prepared(self.uuid):
- return
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
self.osc.cleanup()
def load_module(self):
class Mountpoint(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'MTPT', dom_node)
- self.path = get_text(dom_node, 'path')
- self.mds_uuid = get_first_ref(dom_node, 'mds')
- self.lov_uuid = get_first_ref(dom_node, 'osc')
- self.add_lustre_module('mdc', 'mdc')
+ def __init__(self,db):
+ Module.__init__(self, 'MTPT', db)
+ self.path = self.db.get_val('path')
+ self.mds_uuid = self.db.get_first_ref('mds')
+ self.obd_uuid = self.db.get_first_ref('obd')
+ obd = self.db.lookup(self.obd_uuid)
+ self.vosc = VOSC(obd, self.name)
+ if self.vosc.need_mdc():
+ self.add_lustre_module('mdc', 'mdc')
self.add_lustre_module('llite', 'llite')
- l = lookup(self.dom_node.parentNode, self.lov_uuid)
- self.osc = VOSC(l)
+
def prepare(self):
- self.osc.prepare()
- mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
- self.info(self.path, self.mds_uuid, self.lov_uuid)
+ self.vosc.prepare()
+ if self.vosc.need_mdc():
+ mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
+ else:
+ mdc_uuid = self.vosc.get_mdc_uuid()
+ if not mdc_uuid:
+ panic("Unable to determine MDC UUID. Probably need to cleanup before re-mounting.")
+ self.info(self.path, self.mds_uuid, self.obd_uuid)
cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
- (self.lov_uuid, mdc_uuid, self.path)
+ (self.vosc.get_uuid(), mdc_uuid, self.path)
run("mkdir", self.path)
ret, val = run(cmd)
if ret:
panic("mount failed:", self.path)
def cleanup(self):
- self.info(self.path, self.mds_uuid,self.lov_uuid)
+ self.info(self.path, self.mds_uuid,self.obd_uuid)
if fs_is_mounted(self.path):
if config.force():
(rc, out) = run("umount", "-f", self.path)
if fs_is_mounted(self.path):
panic("fs is still mounted:", self.path)
- l = lookup(self.dom_node.parentNode, self.lov_uuid)
- self.osc.cleanup()
- cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
+ self.vosc.cleanup()
+ if self.vosc.need_mdc():
+ cleanup_mdc(self.db, self.name, self.mds_uuid)
def load_module(self):
- self.osc.load_module()
+ self.vosc.load_module()
Module.load_module(self)
def cleanup_module(self):
Module.cleanup_module(self)
- self.osc.cleanup_module()
+ self.vosc.cleanup_module()
# ============================================================
# XML processing and query
-# TODO: Change query funcs to use XPath, which is muc cleaner
-# Or not. Originally both lconf and lmc used XPath, but it was many
-# orders of magnitute slower, and lmc was unusable. - robert
-
-def get_device(obd):
- list = obd.getElementsByTagName('device')
- if len(list) > 0:
- dev = list[0]
- dev.normalize();
- size = get_attr_int(dev, 'size', 0)
- return dev.firstChild.data, size
- return '', 0
-
-# Get the text content from the first matching child
-# If there is no content (or it is all whitespace), return
-# the default
-def get_text(dom_node, tag, default=""):
- list = dom_node.getElementsByTagName(tag)
- if len(list) > 0:
- dom_node = list[0]
- dom_node.normalize()
- if dom_node.firstChild:
- txt = string.strip(dom_node.firstChild.data)
- if txt:
- return txt
- return default
-
-def get_text_int(dom_node, tag, default=0):
- list = dom_node.getElementsByTagName(tag)
- n = default
- if len(list) > 0:
- dom_node = list[0]
- dom_node.normalize()
- if dom_node.firstChild:
- txt = string.strip(dom_node.firstChild.data)
- if txt:
- try:
- n = int(txt)
- except ValueError:
- panic("text value is not integer:", txt)
- return n
-def get_attr(dom_node, attr, default=""):
- v = dom_node.getAttribute(attr)
- if v:
- return v
- return default
+class LustreDB:
+ def lookup(self, uuid):
+ """ lookup returns a new LustreDB instance"""
+ return self._lookup_by_uuid(uuid)
+
+ def lookup_name(self, name, class_name = ""):
+ """ lookup returns a new LustreDB instance"""
+ return self._lookup_by_name(name, class_name)
+
+ def lookup_class(self, class_name):
+ """ lookup returns a new LustreDB instance"""
+ return self._lookup_by_class(class_name)
+
+ def get_val(self, tag, default=None):
+ v = self._get_val(tag)
+ if v:
+ return v
+ if default != None:
+ return default
+ debug("LustreDB", self.getName(), " no value for:", tag)
+ return None
+
+ def get_class(self):
+ return self._get_class()
-def get_attr_int(dom_node, attr, default=0):
- n = default
- v = dom_node.getAttribute(attr)
- if v:
+ def get_val_int(self, tag, default=0):
+ str = self._get_val(tag)
try:
- n = int(v)
+ if str:
+ return int(str)
+ return default
except ValueError:
- panic("attr value is not integer", v)
- return n
-
-def get_first_ref(dom_node, tag):
- """ Get the first uuidref of the type TAG. Used one only
- one is expected. Returns the uuid."""
- uuid = None
- refname = '%s_ref' % tag
- list = dom_node.getElementsByTagName(refname)
- if len(list) > 0:
- uuid = getRef(list[0])
- return uuid
+ panic("text value is not integer:", str)
+
+ def get_first_ref(self, tag):
+ """ Get the first uuidref of the type TAG. Only
+ one is expected. Returns the uuid."""
+ uuids = self._get_refs(tag)
+ if len(uuids) > 0:
+ return uuids[0]
+ return None
-def get_all_refs(dom_node, tag):
- """ Get all the refs of type TAG. Returns list of uuids. """
- uuids = []
- refname = '%s_ref' % tag
- list = dom_node.getElementsByTagName(refname)
- if len(list) > 0:
- for i in list:
- uuids.append(getRef(i))
- return uuids
-
-def get_ost_net(dom_node, uuid):
- ost = lookup(dom_node, uuid)
- uuid = get_first_ref(ost, 'network')
- if not uuid:
+ def get_refs(self, tag):
+ """ Get all the refs of type TAG. Returns list of uuids. """
+ uuids = self._get_refs(tag)
+ return uuids
+
+ def get_all_refs(self):
+ """ Get all the refs. Returns list of uuids. """
+ uuids = self._get_all_refs()
+ return uuids
+
+ def get_ost_net(self, osd_uuid):
+ srv_list = []
+ if not osd_uuid:
+ return srv_list
+ osd = self.lookup(osd_uuid)
+ node_uuid = osd.get_first_ref('node')
+ node = self.lookup(node_uuid)
+ if not node:
+ panic("unable to find node for osd_uuid:", osd_uuid,
+ " node_ref:", node_uuid)
+ for net_uuid in node.get_networks():
+ db = node.lookup(net_uuid)
+ srv_list.append(Network(db))
+ return srv_list
+
+ def nid2server(self, nid, net_type):
+ netlist = self.lookup_class('network')
+ for net_db in netlist:
+ if net_db.get_val('nid') == nid and net_db.get_val('nettype') == net_type:
+ return net_db
return None
- return lookup(dom_node, uuid)
-
-def nid2server(dom_node, nid):
- netlist = dom_node.getElementsByTagName('network')
- for net_node in netlist:
- if get_text(net_node, 'server') == nid:
- return Network(net_node)
- return None
-def lookup(dom_node, uuid):
- for n in dom_node.childNodes:
- if n.nodeType == n.ELEMENT_NODE:
- if getUUID(n) == uuid:
- return n
+ # the tag name is the service type
+ # fixme: this should do some checks to make sure the dom_node is a service
+ #
+ # determine what "level" a particular node is at.
+
+ # the order of iniitailization is based on level.
+ def getServiceLevel(self):
+ type = self.get_class()
+ ret=0;
+ if type in ('network',):
+ ret = 5
+ elif type in ('routetbl',):
+ ret = 6
+ elif type in ('ptlrpc',):
+ ret = 7
+ elif type in ('device', 'ldlm'):
+ ret = 20
+ elif type in ('osd', 'mdd', 'cobd'):
+ ret = 30
+ elif type in ('mdsdev','ost'):
+ ret = 40
+ elif type in ('mdc','osc'):
+ ret = 50
+ elif type in ('lov',):
+ ret = 60
+ elif type in ('mountpoint', 'echoclient'):
+ ret = 70
+
+ if ret < config.minlevel() or ret > config.maxlevel():
+ ret = 0
+ return ret
+
+ #
+ # return list of services in a profile. list is a list of tuples
+ # [(level, db_object),]
+ def getServices(self):
+ list = []
+ for ref_class, ref_uuid in self.get_all_refs():
+ servdb = self.lookup(ref_uuid)
+ if servdb:
+ level = servdb.getServiceLevel()
+ if level > 0:
+ list.append((level, servdb))
+ else:
+ panic('service not found: ' + ref_uuid)
+
+ list.sort()
+ return list
+
+ # Find the target_device for target on a node
+ # node->profiles->device_refs->target
+ def get_target_device(self, target_uuid, node_name):
+ node_db = self.lookup_name(node_name)
+ if not node_db:
+ return None
+ prof_list = node_db.get_refs('profile')
+ for prof_uuid in prof_list:
+ prof_db = node_db.lookup(prof_uuid)
+ ref_list = prof_db.get_all_refs()
+ for ref in ref_list:
+ dev = self.lookup(ref[1])
+ if dev and dev.get_first_ref('target') == target_uuid:
+ return ref[1]
+ return None
+
+ def get_active_target(self):
+ target_uuid = self.getUUID()
+ target_name = self.getName()
+ node_name = config.select(target_name)
+ if node_name:
+ tgt_dev_uuid = self.get_target_device(target_uuid, node_name)
+ else:
+ tgt_dev_uuid = self.get_first_ref('active')
+ return tgt_dev_uuid
+
+
+ # get all network uuids for this node
+ def get_networks(self):
+ ret = []
+ prof_list = self.get_refs('profile')
+ for prof_uuid in prof_list:
+ prof_db = self.lookup(prof_uuid)
+ net_list = prof_db.get_refs('network')
+ #debug("get_networks():", prof_uuid, net_list)
+ for net_uuid in net_list:
+ ret.append(net_uuid)
+ return ret
+
+class LustreDB_XML(LustreDB):
+ def __init__(self, dom, root_node):
+ # init xmlfile
+ self.dom_node = dom
+ self.root_node = root_node
+
+ def xmltext(self, dom_node, tag):
+ list = dom_node.getElementsByTagName(tag)
+ if len(list) > 0:
+ dom_node = list[0]
+ dom_node.normalize()
+ if dom_node.firstChild:
+ txt = string.strip(dom_node.firstChild.data)
+ if txt:
+ return txt
+
+ def xmlattr(self, dom_node, attr):
+ return dom_node.getAttribute(attr)
+
+ def _get_val(self, tag):
+ """a value could be an attribute of the current node
+ or the text value in a child node"""
+ ret = self.xmlattr(self.dom_node, tag)
+ if not ret:
+ ret = self.xmltext(self.dom_node, tag)
+ return ret
+
+ def _get_class(self):
+ return self.dom_node.nodeName
+
+ #
+ # [(ref_class, ref_uuid),]
+ def _get_all_refs(self):
+ list = []
+ for n in self.dom_node.childNodes:
+ if n.nodeType == n.ELEMENT_NODE:
+ ref_uuid = self.xml_get_ref(n)
+ ref_class = n.nodeName
+ list.append((ref_class, ref_uuid))
+
+ list.sort()
+ return list
+
+ def _get_refs(self, tag):
+ """ Get all the refs of type TAG. Returns list of uuids. """
+ uuids = []
+ refname = '%s_ref' % tag
+ reflist = self.dom_node.getElementsByTagName(refname)
+ for r in reflist:
+ uuids.append(self.xml_get_ref(r))
+ return uuids
+
+ def xmllookup_by_uuid(self, dom_node, uuid):
+ for n in dom_node.childNodes:
+ if n.nodeType == n.ELEMENT_NODE:
+ if self.xml_get_uuid(n) == uuid:
+ return n
+ else:
+ n = self.xmllookup_by_uuid(n, uuid)
+ if n: return n
+ return None
+
+ def _lookup_by_uuid(self, uuid):
+ dom = self. xmllookup_by_uuid(self.root_node, uuid)
+ if dom:
+ return LustreDB_XML(dom, self.root_node)
+
+ def xmllookup_by_name(self, dom_node, name):
+ for n in dom_node.childNodes:
+ if n.nodeType == n.ELEMENT_NODE:
+ if self.xml_get_name(n) == name:
+ return n
+ else:
+ n = self.xmllookup_by_name(n, name)
+ if n: return n
+ return None
+
+ def _lookup_by_name(self, name, class_name):
+ dom = self.xmllookup_by_name(self.root_node, name)
+ if dom:
+ return LustreDB_XML(dom, self.root_node)
+
+ def xmllookup_by_class(self, dom_node, class_name):
+ return dom_node.getElementsByTagName(class_name)
+
+ def _lookup_by_class(self, class_name):
+ ret = []
+ domlist = self.xmllookup_by_class(self.root_node, class_name)
+ for node in domlist:
+ ret.append(LustreDB_XML(node, self.root_node))
+ return ret
+
+ def xml_get_name(self, n):
+ return n.getAttribute('name')
+
+ def getName(self):
+ return self.xml_get_name(self.dom_node)
+
+ def xml_get_ref(self, n):
+ return n.getAttribute('uuidref')
+
+ def xml_get_uuid(self, dom_node):
+ return dom_node.getAttribute('uuid')
+
+ def getUUID(self):
+ return self.xml_get_uuid(self.dom_node)
+
+ def get_routes(self, type, gw):
+ """ Return the routes as a list of tuples of the form:
+ [(type, gw, lo, hi),]"""
+ res = []
+ tbl = self.dom_node.getElementsByTagName('routetbl')
+ for t in tbl:
+ routes = t.getElementsByTagName('route')
+ for r in routes:
+ net_type = self.xmlattr(r, 'type')
+ if type != net_type:
+ lo = self.xmlattr(r, 'lo')
+ hi = self.xmlattr(r, 'hi')
+ res.append((type, gw, lo, hi))
+ return res
+
+ def get_route_tbl(self):
+ ret = []
+ for r in self.dom_node.getElementsByTagName('route'):
+ net_type = self.xmlattr(r, 'type')
+ gw = self.xmlattr(r, 'gw')
+ lo = self.xmlattr(r, 'lo')
+ hi = self.xmlattr(r, 'hi')
+ ret.append((net_type, gw, lo, hi))
+ return ret
+
+
+# ================================================================
+# LDAP Support
+class LustreDB_LDAP(LustreDB):
+ def __init__(self, name, attrs,
+ base = "fs=lustre",
+ parent = None,
+ url = "ldap://localhost",
+ user = "cn=Manager, fs=lustre",
+ pw = "secret"
+ ):
+ self._name = name
+ self._attrs = attrs
+ self._base = base
+ self._parent = parent
+ self._url = url
+ self._user = user
+ self._pw = pw
+ if parent:
+ self.l = parent.l
+ self._base = parent._base
+ else:
+ self.open()
+
+ def open(self):
+ import ldap
+ try:
+ self.l = ldap.initialize(self._url)
+ # Set LDAP protocol version used
+ self.l.protocol_version=ldap.VERSION3
+ # user and pw only needed if modifying db
+ self.l.bind_s("", "", ldap.AUTH_SIMPLE);
+ except ldap.LDAPError, e:
+ panic(e)
+ # FIXME, do something useful here
+
+ def close(self):
+ self.l.unbind_s()
+
+ def ldap_search(self, filter):
+ """Return list of uuids matching the filter."""
+ import ldap
+ dn = self._base
+ ret = []
+ uuids = []
+ try:
+ for name, attrs in self.l.search_s(dn, ldap.SCOPE_ONELEVEL,
+ filter, ["uuid"]):
+ for v in attrs['uuid']:
+ uuids.append(v)
+ except ldap.NO_SUCH_OBJECT, e:
+ pass
+ except ldap.LDAPError, e:
+ print e # FIXME: die here?
+ if len(uuids) > 0:
+ for uuid in uuids:
+ ret.append(self._lookup_by_uuid(uuid))
+ return ret
+
+ def _lookup_by_name(self, name, class_name):
+ list = self.ldap_search("lustreName=%s" %(name))
+ if len(list) == 1:
+ return list[0]
+ return []
+
+ def _lookup_by_class(self, class_name):
+ return self.ldap_search("objectclass=%s" %(string.upper(class_name)))
+
+ def _lookup_by_uuid(self, uuid):
+ import ldap
+ dn = "uuid=%s,%s" % (uuid, self._base)
+ ret = None
+ try:
+ for name, attrs in self.l.search_s(dn, ldap.SCOPE_BASE,
+ "objectclass=*"):
+ ret = LustreDB_LDAP(name, attrs, parent = self)
+
+ except ldap.NO_SUCH_OBJECT, e:
+ debug("NO_SUCH_OBJECT:", uuid)
+ pass # just return empty list
+ except ldap.LDAPError, e:
+ print e # FIXME: die here?
+ return ret
+
+
+ def _get_val(self, k):
+ ret = None
+ if self._attrs.has_key(k):
+ v = self._attrs[k]
+ if type(v) == types.ListType:
+ ret = str(v[0])
else:
- n = lookup(n, uuid)
- if n: return n
- return None
-
-# Get name attribute of dom_node
-def getName(dom_node):
- return dom_node.getAttribute('name')
+ ret = str(v)
+ return ret
-def getRef(dom_node):
- return dom_node.getAttribute('uuidref')
+ def _get_class(self):
+ return string.lower(self._attrs['objectClass'][0])
-# Get name attribute of dom_node
-def getUUID(dom_node):
- return dom_node.getAttribute('uuid')
+ #
+ # [(ref_class, ref_uuid),]
+ def _get_all_refs(self):
+ list = []
+ for k in self._attrs.keys():
+ if re.search('.*Ref', k):
+ for uuid in self._attrs[k]:
+ list.append((k, uuid))
+ return list
-# the tag name is the service type
-# fixme: this should do some checks to make sure the dom_node is a service
-def getServiceType(dom_node):
- return dom_node.nodeName
+ def _get_refs(self, tag):
+ """ Get all the refs of type TAG. Returns list of uuids. """
+ uuids = []
+ refname = '%sRef' % tag
+ if self._attrs.has_key(refname):
+ return self._attrs[refname]
+ return []
-#
-# determine what "level" a particular node is at.
-# the order of iniitailization is based on level.
-def getServiceLevel(dom_node):
- type = getServiceType(dom_node)
- ret=0;
- if type in ('network',):
- ret = 10
- elif type in ('device', 'ldlm'):
- ret = 20
- elif type in ('obd', 'mdd', 'cobd'):
- ret = 30
- elif type in ('mds','ost'):
- ret = 40
- elif type in ('mdc','osc'):
- ret = 50
- elif type in ('lov', 'lovconfig'):
- ret = 60
- elif type in ('mountpoint', 'echo_client'):
- ret = 70
-
- if ret < config.minlevel() or ret > config.maxlevel():
- ret = 0
- return ret
+ def getName(self):
+ return self._get_val('lustreName')
-#
-# return list of services in a profile. list is a list of tuples
-# [(level, dom_node),]
-def getServices(lustreNode, profileNode):
- list = []
- for n in profileNode.childNodes:
- if n.nodeType == n.ELEMENT_NODE:
- servNode = lookup(lustreNode, getRef(n))
- if not servNode:
- print n
- panic('service not found: ' + getRef(n))
- level = getServiceLevel(servNode)
- if level > 0:
- list.append((level, servNode))
- list.sort()
- return list
-
-def getByName(lustreNode, name, tag):
- ndList = lustreNode.getElementsByTagName(tag)
- for nd in ndList:
- if getName(nd) == name:
- return nd
- return None
-
+ def getUUID(self):
+ return self._get_val('uuid')
+
+ def get_route_tbl(self):
+ return []
############################################################
# MDC UUID hack -
# FIXME: clean this mess up!
#
-saved_mdc = {}
-def prepare_mdc(dom_node, mds_uuid):
- global saved_mdc
- mds_node = lookup(dom_node, mds_uuid);
- if not mds_node:
+# OSC is no longer in the xml, so we have to fake it.
+# this is getting ugly and begging for another refactoring
+def get_osc(ost_db, owner):
+ osc = OSC(ost_db, owner)
+ return osc
+
+def get_mdc(db, owner, mds_uuid):
+ mds_db = db.lookup(mds_uuid);
+ if not mds_db:
panic("no mds:", mds_uuid)
- if saved_mdc.has_key(mds_uuid):
- return saved_mdc[mds_uuid]
- mdc = MDC(mds_node)
+ mdc = MDC(mds_db, owner)
+ return mdc
+
+def prepare_mdc(db, owner, mds_uuid):
+ mdc = get_mdc(db, owner, mds_uuid)
mdc.prepare()
- saved_mdc[mds_uuid] = mdc.uuid
return mdc.uuid
-def cleanup_mdc(dom_node, mds_uuid):
- global saved_mdc
- mds_node = lookup(dom_node, mds_uuid);
- if not mds_node:
- panic("no mds:", mds_uuid)
- if not saved_mdc.has_key(mds_uuid):
- mdc = MDC(mds_node)
- mdc.cleanup()
- saved_mdc[mds_uuid] = mdc.uuid
+def cleanup_mdc(db, owner, mds_uuid):
+ mdc = get_mdc(db, owner, mds_uuid)
+ mdc.cleanup()
############################################################
local_node = []
router_flag = 0
-def init_node(dom_node):
- global local_node, router_flag
- netlist = dom_node.getElementsByTagName('network')
- for dom_net in netlist:
- type = get_attr(dom_net, 'type')
- gw = get_text(dom_net, 'server')
- local_node.append((type, gw))
+def add_local_interfaces(node_db):
+ global local_node
+ for netuuid in node_db.get_networks():
+ net = node_db.lookup(netuuid)
+ srv = Network(net)
+ debug("add_local", netuuid)
+ local_node.append((srv.net_type, srv.nid))
+ if acceptors.has_key(srv.port):
+ panic("duplicate port:", srv.port)
+ if srv.net_type in ('tcp', 'toe'):
+ acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type,
+ srv.send_mem, srv.recv_mem,
+ srv.irq_affinity,
+ srv.nid_exchange)
def node_needs_router():
return router_flag
-def get_routes(type, gw, dom_net):
- """ Return the routes as a list of tuples of the form:
- [(type, gw, lo, hi),]"""
- res = []
- tbl = dom_net.getElementsByTagName('route_tbl')
- for t in tbl:
- routes = t.getElementsByTagName('route')
- for r in routes:
- lo = get_attr(r, 'lo')
- hi = get_attr(r, 'hi', '')
- res.append((type, gw, lo, hi))
- return res
-
-
def init_route_config(lustre):
""" Scan the lustre config looking for routers. Build list of
routes. """
global routes, router_flag
routes = []
- list = lustre.getElementsByTagName('node')
- for node in list:
- if get_attr(node, 'router'):
+ list = lustre.lookup_class('node')
+ for node_db in list:
+ if node_db.get_val_int('router', 0):
router_flag = 1
+ #debug("init_route_config: found router", node_db.getName())
for (local_type, local_nid) in local_node:
+ #debug("init_route_config:", local_type, local_nid)
gw = None
- netlist = node.getElementsByTagName('network')
- for dom_net in netlist:
- if local_type == get_attr(dom_net, 'type'):
- gw = get_text(dom_net, 'server')
+ for netuuid in node_db.get_networks():
+ db = node_db.lookup(netuuid)
+ if local_type == db.get_val('nettype'):
+ gw = db.get_val('nid')
break
+ #debug("init_route_config: gw is", gw)
if not gw:
continue
- for dom_net in netlist:
- if local_type != get_attr(dom_net, 'type'):
- for route in get_routes(local_type, gw, dom_net):
- routes.append(route)
-
+ for route in node_db.get_routes(local_type, gw):
+ routes.append(route)
+ debug("init_route_config routes:", routes)
+
+
+def local_net(srv_list):
+ global local_node
+ for iface in local_node:
+ for srv in srv_list:
+ #debug("local_net a:", srv.net_type, "b:", iface[0])
+ if srv.net_type == iface[0]:
+ return srv
+ return None
-def local_net(net):
+def local_net_type(net_type):
global local_node
for iface in local_node:
- if net.net_type == iface[0]:
+ if net_type == iface[0]:
return 1
return 0
-def find_route(net):
+def find_route(srv_list):
global local_node, routes
frm_type = local_node[0][0]
- to_type = net.net_type
- to = net.nid
- debug ('looking for route to', to_type,to)
- for r in routes:
- if r[2] == to:
- return r
- return None
+ for srv in srv_list:
+ #debug("find_route: srv:", srv.hostaddr, "type: ", srv.net_type)
+ to_type = srv.net_type
+ to = srv.hostaddr
+ #debug ('looking for route to', to_type, to)
+ for r in routes:
+ #debug("find_route: ", r)
+ if r[2] == to:
+ return srv, r
+ return None,None
-
-
############################################################
# lconf level logic
# Start a service.
-def startService(dom_node, module_flag):
- type = getServiceType(dom_node)
- debug('Service:', type, getName(dom_node), getUUID(dom_node))
- # there must be a more dynamic way of doing this...
+def newService(db):
+ type = db.get_class()
+ debug('Service:', type, db.getName(), db.getUUID())
n = None
if type == 'ldlm':
- n = LDLM(dom_node)
+ n = LDLM(db)
+ elif type == 'ptlrpc':
+ n = PTLRPC(db)
elif type == 'lov':
- n = LOV(dom_node)
- elif type == 'lovconfig':
- n = LOVConfig(dom_node)
+ n = LOV(db)
elif type == 'network':
- n = Network(dom_node)
- elif type == 'obd':
- n = OBD(dom_node)
+ n = Network(db)
+ elif type == 'routetbl':
+ n = Router(db)
+ elif type == 'osd':
+ n = OSD(db)
elif type == 'cobd':
- n = COBD(dom_node)
- elif type == 'ost':
- n = OST(dom_node)
- elif type == 'mds':
- n = MDS(dom_node)
- elif type == 'osc':
- n = VOSC(dom_node)
- elif type == 'mdc':
- n = MDC(dom_node)
+ n = COBD(db)
+ elif type == 'mdsdev':
+ n = MDSDEV(db)
elif type == 'mountpoint':
- n = Mountpoint(dom_node)
- elif type == 'echo_client':
- n = ECHO_CLIENT(dom_node)
+ n = Mountpoint(db)
+ elif type == 'echoclient':
+ n = ECHO_CLIENT(db)
else:
panic ("unknown service type:", type)
-
- if module_flag:
- if config.nomod():
- return
- if config.cleanup():
- n.cleanup_module()
- else:
- n.load_module()
- else:
- if config.nosetup():
- return
- if config.cleanup():
- n.cleanup()
- else:
- n.prepare()
+ return n
#
# Prepare the system to run lustre using a particular profile
# * make sure partitions are in place and prepared
# * initialize devices with lctl
# Levels is important, and needs to be enforced.
-def startProfile(lustreNode, profileNode, module_flag):
- if not profileNode:
- panic("profile:", profile, "not found.")
- services = getServices(lustreNode, profileNode)
- if config.cleanup():
- services.reverse()
+def for_each_profile(db, prof_list, operation):
+ for prof_uuid in prof_list:
+ prof_db = db.lookup(prof_uuid)
+ if not prof_db:
+ panic("profile:", profile, "not found.")
+ services = prof_db.getServices()
+ operation(services)
+
+def doSetup(services):
+ if config.nosetup():
+ return
+ for s in services:
+ n = newService(s[1])
+ n.prepare()
+
+def doModules(services):
+ if config.nomod():
+ return
+ for s in services:
+ n = newService(s[1])
+ n.load_module()
+
+def doCleanup(services):
+ if config.nosetup():
+ return
+ services.reverse()
for s in services:
- startService(s[1], module_flag)
+ n = newService(s[1])
+ n.cleanup()
+def doUnloadModules(services):
+ if config.nomod():
+ return
+ services.reverse()
+ for s in services:
+ n = newService(s[1])
+ n.cleanup_module()
#
# Load profile for
-def doHost(lustreNode, hosts):
+def doHost(lustreDB, hosts):
global routes
- dom_node = None
+ global router_flag
+ node_db = None
for h in hosts:
- dom_node = getByName(lustreNode, h, 'node')
- if dom_node:
+ node_db = lustreDB.lookup_name(h, 'node')
+ if node_db:
break
- if not dom_node:
+ if not node_db:
print 'No host entry found.'
return
- if not get_attr(dom_node, 'router'):
- init_node(dom_node)
- init_route_config(lustreNode)
- else:
- global router_flag
- router_flag = 1
+ router_flag = node_db.get_val_int('router', 0)
+ recovery_upcall = node_db.get_val('recovery_upcall', '')
+ timeout = node_db.get_val_int('timeout', 0)
+
+ add_local_interfaces(node_db)
+ if not router_flag:
+ init_route_config(lustreDB)
# Two step process: (1) load modules, (2) setup lustre
# if not cleaning, load modules first.
- module_flag = not config.cleanup()
- reflist = dom_node.getElementsByTagName('profile')
- for profile in reflist:
- startProfile(lustreNode, profile, module_flag)
+ prof_list = node_db.get_refs('profile')
+
+ if config.cleanup():
+ if config.force():
+ # the command line can override this value
+ timeout = 5
+ # ugly hack, only need to run lctl commands for --dump
+ if config.lctl_dump():
+ for_each_profile(node_db, prof_list, doCleanup)
+ return
+
+ sys_set_timeout(timeout)
+ sys_set_recovery_upcall(recovery_upcall)
+
+ for_each_profile(node_db, prof_list, doCleanup)
+ for_each_profile(node_db, prof_list, doUnloadModules)
+
+ else:
+ # ugly hack, only need to run lctl commands for --dump
+ if config.lctl_dump():
+ for_each_profile(node_db, prof_list, doSetup)
+ return
+
+ for_each_profile(node_db, prof_list, doModules)
- if not config.cleanup():
sys_set_debug_path()
script = config.gdb_script()
run(lctl.lctl, ' modules >', script)
if config.gdb():
- # dump /tmp/ogdb and sleep/pause here
log ("The GDB module script is in", script)
+ # pause, so user has time to break and
+ # load the script
time.sleep(5)
-
- module_flag = not module_flag
- for profile in reflist:
- startProfile(lustreNode, profile, module_flag)
+ sys_set_timeout(timeout)
+ sys_set_recovery_upcall(recovery_upcall)
+
+ for_each_profile(node_db, prof_list, doSetup)
############################################################
# Command line processing
long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
"portals=", "makeldiff", "cleanup", "noexec",
"help", "node=", "nomod", "nosetup",
- "dump=", "force", "minlevel=", "maxlevel="]
+ "dump=", "force", "minlevel=", "maxlevel=",
+ "timeout=", "recovery_upcall=",
+ "ldapurl=", "config=", "select=", "lctl_dump="]
opts = []
args = []
config.verbose(1)
if o in ("-n", "--noexec"):
config.noexec(1)
- config.verbose(1)
if o == "--portals":
config.portals_dir(a)
if o == "--lustre":
config.dump_file(a)
if o in ("-f", "--force"):
config.force(1)
- if o in ("--minlevel",):
+ if o == "--minlevel":
config.minlevel(a)
- if o in ("--maxlevel",):
+ if o == "--maxlevel":
config.maxlevel(a)
+ if o == "--timeout":
+ config.timeout(a)
+ if o == "--recovery_upcall":
+ config.recovery_upcall(a)
+ if o == "--ldapurl":
+ config.ldapurl(a)
+ if o == "--config":
+ config.config_name(a)
+ if o == "--select":
+ config.init_select(a)
+ if o == "--lctl_dump":
+ config.lctl_dump(a)
+
return args
def fetch(url):
dir = os.path.join(config.lustre_dir(), dir)
config.portals_dir(dir)
-def sys_set_debug_path():
- debug("debug path: ", config.debug_path())
+def sysctl(path, val):
if config.noexec():
return
try:
- fp = open('/proc/sys/portals/debug_path', 'w')
- fp.write(config.debug_path())
+ fp = open(os.path.join('/proc/sys', path), 'w')
+ fp.write(str(val))
fp.close()
except IOError, e:
print e
-
-#/proc/sys/net/core/rmem_max
-#/proc/sys/net/core/wmem_max
+
+
+def sys_set_debug_path():
+ debug("debug path: ", config.debug_path())
+ sysctl('portals/debug_path', config.debug_path())
+
+def sys_set_recovery_upcall(upcall):
+ # the command overrides the value in the node config
+ if config.recovery_upcall():
+ upcall = config.recovery_upcall()
+ if upcall:
+ debug("setting recovery_upcall:", upcall)
+ sysctl('lustre/recovery_upcall', upcall)
+
+def sys_set_timeout(timeout):
+ # the command overrides the value in the node config
+ if config.timeout() > 0:
+ timeout = config.timeout()
+ if timeout > 0:
+ debug("setting timeout:", timeout)
+ sysctl('lustre/timeout', timeout)
+
+def sys_set_ptldebug(ptldebug):
+ # the command overrides the value in the node config
+ if config.ptldebug():
+ ptldebug = config.ptldebug()
+ sysctl('portals/debug', ptldebug)
+
def sys_set_netmem_max(path, max):
debug("setting", path, "to at least", max)
if config.noexec():
# Shutdown does steps in reverse
#
def main():
- global TCP_ACCEPTOR, lctl, MAXTCPBUF
+ global lctl, MAXTCPBUF
host = socket.gethostname()
if not os.access(args[0], os.R_OK):
print 'File not found or readable:', args[0]
sys.exit(1)
- dom = xml.dom.minidom.parse(args[0])
- elif config.url():
- xmldata = fetch(config.url())
- dom = xml.dom.minidom.parseString(xmldata)
+ try:
+ dom = xml.dom.minidom.parse(args[0])
+ except Exception:
+ panic("%s does not appear to be a config file." % (args[0]))
+ sys.exit(1) # make sure to die here, even in debug mode.
+ db = LustreDB_XML(dom.documentElement, dom.documentElement)
+ elif config.ldapurl():
+ if not config.config_name():
+ panic("--ldapurl requires --config name")
+ dn = "config=%s,fs=lustre" % (config.config_name())
+ db = LustreDB_LDAP('', {}, base=dn, url = config.ldapurl())
else:
usage()
setupModulePath(sys.argv[0])
- TCP_ACCEPTOR = find_prog('acceptor')
- if not TCP_ACCEPTOR:
- if config.noexec():
- TCP_ACCEPTOR = 'acceptor'
- debug('! acceptor not found')
- else:
- panic('acceptor not found')
-
lctl = LCTLInterface('lctl')
+ if config.lctl_dump():
+ lctl.use_save_file(config.lctl_dump())
+ else:
+ sys_make_devices()
+ sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
+ sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
- sys_make_devices()
- sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
- sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
- doHost(dom.documentElement, node_list)
+ doHost(db, node_list)
if __name__ == "__main__":
try:
if first_cleanup_error:
sys.exit(first_cleanup_error)
-
+