from fcntl import F_GETFL, F_SETFL
# Global parameters
-TCP_ACCEPTOR = ''
MAXTCPBUF = 1048576
DEFAULT_TCPBUF = 1048576
#
self._ldapurl = ''
self._config_name = ''
self._select = {}
+ self._lctl_dump = ''
def verbose(self, flag = None):
if flag: self._verbose = flag
return self._select[srv]
return None
+ def lctl_dump(self, val = None):
+ if val: self._lctl_dump = val
+ return self._lctl_dump
+
config = Config()
# ============================================================
+# handle daemons, like the acceptor
+class DaemonHandler:
+ """ Manage starting and stopping a daemon. Assumes daemon manages
+ it's own pid file. """
+
+ def __init__(self, cmd):
+ self.command = cmd
+ self.path =""
+
+ def start(self):
+ if self.running():
+ log(self.command, "already running.")
+ if not self.path:
+ self.path = find_prog(self.command)
+ if not self.path:
+ panic(self.command, "not found.")
+ ret, out = runcmd(self.path +' '+ self.command_line())
+ if ret:
+ raise CommandError(self.path, out, ret)
+
+ def stop(self):
+ if self.running():
+ pid = self.read_pidfile()
+ try:
+ log ("killing process", pid)
+ os.kill(pid, 15)
+ #time.sleep(1) # let daemon die
+ except OSError, e:
+ log("unable to kill", self.command, e)
+ if self.running():
+ log("unable to kill", self.command)
+
+ def running(self):
+ pid = self.read_pidfile()
+ if pid:
+ try:
+ os.kill(pid, 0)
+ except OSError:
+ self.clean_pidfile()
+ else:
+ return 1
+ return 0
+
+ def read_pidfile(self):
+ try:
+ fp = open(self.pidfile(), 'r')
+ pid = int(fp.read())
+ fp.close()
+ return pid
+ except IOError:
+ return 0
+
+ def clean_pidfile(self):
+ """ Remove a stale pidfile """
+ log("removing stale pidfile:", self.pidfile())
+ try:
+ os.unlink(self.pidfile())
+ except OSError, e:
+ log(self.pidfile(), e)
+
+class AcceptorHandler(DaemonHandler):
+ def __init__(self, port, net_type, send_mem, recv_mem, irq_aff, nid_xchg):
+ DaemonHandler.__init__(self, "acceptor")
+ self.port = port
+ self.flags = ''
+ self.send_mem = send_mem
+ self.recv_mem = recv_mem
+
+ if net_type == 'toe':
+ self.flags = self.flags + ' -N 4'
+ if irq_aff:
+ self.flags = self.flags + ' -i'
+ if nid_xchg:
+ self.flags = self.flags + ' -x'
+
+ def pidfile(self):
+ return "/var/run/%s-%d.pid" % (self.command, self.port)
+
+ def command_line(self):
+ return string.join(map(str,('-s', self.send_mem, '-r', self.recv_mem, self.flags, self.port)))
+
+acceptors = {}
+
+# start the acceptors
+def run_acceptors():
+ for port in acceptors.keys():
+ daemon = acceptors[port]
+ if not daemon.running():
+ daemon.start()
+
+def stop_acceptor(port):
+ if acceptors.has_key(port):
+ daemon = acceptors[port]
+ if daemon.running():
+ daemon.stop()
+
+
+# ============================================================
# handle lctl interface
class LCTLInterface:
"""
Initialize close by finding the lctl binary.
"""
self.lctl = find_prog(cmd)
+ self.save_file = ''
if not self.lctl:
if config.noexec():
debug('! lctl not found')
else:
raise CommandError('lctl', "unable to find lctl binary.")
+ def use_save_file(self, file):
+ self.save_file = file
+
def set_nonblock(self, fd):
fl = fcntl.fcntl(fd, F_GETFL)
fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
should modify command line to accept multiple commands, or
create complex command line options
"""
- debug("+", self.lctl, cmds)
+ cmd_line = self.lctl
+ if self.save_file:
+ cmds = '\n dump ' + self.save_file + cmds
+
+ debug("+", cmd_line, cmds)
if config.noexec(): return (0, [])
- child = popen2.Popen3(self.lctl, 1) # Capture stdout and stderr from command
+ child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
child.tochild.write(cmds + "\n")
child.tochild.close()
cmds = """
network %s
mynid %s
- add_uuid self %s
- quit""" % (net, nid, nid)
- else:
- cmds = """
- network %s
- add_uuid self %s
- quit""" % (net, nid)
-
- self.run(cmds)
+ quit """ % (net, nid)
+ self.run(cmds)
# create a new connection
- def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
- if net in ('tcp', 'toe'):
- cmds = """
+ def connect(self, srv):
+ cmds = "\n add_uuid %s %s %s" % (srv.uuid, srv.nid, srv.net_type)
+ if srv.net_type in ('tcp', 'toe') and not config.lctl_dump():
+ flags = ''
+ if srv.irq_affinity:
+ flags = flags + 'i'
+ if srv.nid_exchange:
+ flags = flags + 'x'
+ cmds = """%s
network %s
- add_uuid %s %s
send_mem %d
recv_mem %d
- connect %s %d
- quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, )
- else:
- cmds = """
- network %s
- add_uuid %s %s
- connect %s %d
- quit""" % (net, servuuid, nid, nid, port, )
-
+ connect %s %d %s""" % (cmds, srv.net_type,
+ srv.send_mem,
+ srv.recv_mem,
+ srv.hostaddr, srv.port, flags )
+
+ cmds = cmds + "\n quit"
self.run(cmds)
# add a route to a range
cmds = """
network %s
add_route %s %s %s
- quit """ % (net, gw, lo, hi)
+ quit """ % (net,
+ gw, lo, hi)
self.run(cmds)
def add_route_host(self, net, uuid, gw, tgt):
cmds = """
network %s
- add_uuid %s %s
+ add_uuid %s %s %s
add_route %s %s
- quit """ % (net, uuid, tgt, gw, tgt)
+ quit """ % (net,
+ uuid, tgt, net,
+ gw, tgt)
self.run(cmds)
# add a route to a range
cmds = """
ignore_errors
network %s
- del_uuid self
disconnect
quit""" % (net)
self.run(cmds)
# Run a command and return the output and status.
# stderr is sent to /dev/null, could use popen3 to
# save it if necessary
-def run(*args):
- cmd = string.join(map(str,args))
+def runcmd(cmd):
debug ("+", cmd)
if config.noexec(): return (0, [])
f = os.popen(cmd + ' 2>&1')
ret = 0
return (ret, out)
+def run(*args):
+ cmd = string.join(map(str,args))
+ return runcmd(cmd)
+
# Run a command in the background.
def run_daemon(*args):
cmd = string.join(map(str,args))
cmdpath = os.path.dirname(sys.argv[0])
syspath.insert(0, cmdpath);
if config.portals_dir():
- syspath.insert(0, os.path.join(cmdpath, config.portals_dir()+'/linux/utils/'))
+ syspath.insert(0, os.path.join(config.portals_dir()+'/linux/utils/'))
for d in syspath:
prog = os.path.join(d,cmd)
if os.access(prog, os.X_OK):
ip = string.split(addr, ':')[1]
return ip
+def get_local_nid(net_type, wildcard):
+ """Return the local nid. First look for an elan interface,
+ then use the local address. """
+ local = ""
+ if os.access('/proc/elan/device0/position', os.R_OK):
+ local = get_local_address('elan', '*')
+ else:
+ local = get_local_address(net_type, wildcard)
+ return local
+
def get_local_address(net_type, wildcard):
"""Return the local address for the network type."""
local = ""
"""Return true if a device exists for the uuid"""
# expect this format:
# 1 UP ldlm ldlm ldlm_UUID 2
+ if config.lctl_dump():
+ return 0
try:
out = lctl.device_list()
for s in out:
except CommandError, e:
e.dump()
return 0
+
+def is_network_prepared():
+ """If the PTLRPC device exists, then assumet that all networking
+ has been configured"""
+ if config.lctl_dump():
+ return 0
+ try:
+ out = lctl.device_list()
+ for s in out:
+ if 'RPCDEV_UUID' == string.split(s)[4]:
+ return 1
+ except CommandError, e:
+ e.dump()
+ return 0
+
def fs_is_mounted(path):
"""Return true if path is a mounted lustre filesystem"""
msg = string.join(map(str,args))
print self.module_name + ":", self.name, self.uuid, msg
- def lookup_server(self, srv_uuid):
- """ Lookup a server's network information """
- net = self.db.get_ost_net(srv_uuid)
- if not net:
- panic ("Unable to find a server for:", srv_uuid)
- self._server = Network(net)
-
- def get_server(self):
- return self._server
-
def cleanup(self):
""" default cleanup, used for most modules """
self.info()
- srv = self.get_server()
- if srv and local_net(srv):
- try:
- lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
- except CommandError, e:
- log(self.module_name, "disconnect failed: ", self.name)
- e.dump()
- cleanup_error(e.rc)
try:
lctl.cleanup(self.name, self.uuid)
except CommandError, e:
log(self.module_name, "cleanup failed: ", self.name)
e.dump()
cleanup_error(e.rc)
-
+
def add_portals_module(self, dev_dir, modname):
"""Append a module to list of modules to load."""
self.kmodule_list.append((config.portals_dir(), dev_dir, modname))
log('! unable to unload module:', mod)
logall(out)
-
class Network(Module):
def __init__(self,db):
Module.__init__(self, 'NETWORK', db)
self.net_type = self.db.get_val('nettype')
self.nid = self.db.get_val('nid', '*')
self.port = self.db.get_val_int('port', 0)
- self.send_mem = self.db.get_val_int('send_mem', DEFAULT_TCPBUF)
- self.recv_mem = self.db.get_val_int('recv_mem', DEFAULT_TCPBUF)
+ self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF)
+ self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF)
+ self.irq_affinity = self.db.get_val_int('irqaffinity', 0)
+ self.nid_exchange = self.db.get_val_int('nidexchange', 0)
+
if '*' in self.nid:
- self.nid = get_local_address(self.net_type, self.nid)
+ self.nid = get_local_nid(self.net_type, self.nid)
if not self.nid:
panic("unable to set nid for", self.net_type, self.nid)
debug("nid:", self.nid)
+
+ self.hostaddr = self.db.get_val('hostaddr', self.nid)
+ if '*' in self.hostaddr:
+ self.hostaddr = get_local_address(self.net_type, self.hostaddr)
+ if not self.nid:
+ panic("unable to set nid for", self.net_type, self.hostaddr)
+ debug("hostaddr:", self.hostaddr)
+ # debug ( "hostaddr ", self.hostaddr, "net_type", self.net_type)
+
self.add_portals_module("linux/oslib", 'portals')
if node_needs_router():
self.add_portals_module("linux/router", 'kptlrouter')
if self.net_type == 'gm':
self.add_portals_module("/linux/gmnal", 'kgmnal')
self.add_lustre_module('obdclass', 'obdclass')
- self.add_lustre_module('ptlrpc', 'ptlrpc')
def prepare(self):
+ if is_network_prepared():
+ return
+ self.info(self.net_type, self.nid, self.port)
+ lctl.network(self.net_type, self.nid)
+
+ def cleanup(self):
self.info(self.net_type, self.nid, self.port)
if self.net_type in ('tcp', 'toe'):
- nal_id = '' # default is socknal
- if self.net_type == 'toe':
- nal_id = '-N 4'
- ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, nal_id, self.port)
- if ret:
- raise CommandError(TCP_ACCEPTOR, out, ret)
+ stop_acceptor(self.port)
+ try:
+ lctl.disconnectAll(self.net_type)
+ except CommandError, e:
+ print "disconnectAll failed: ", self.name
+ e.dump()
+ cleanup_error(e.rc)
+
+class Router(Module):
+ def __init__(self,db):
+ Module.__init__(self, 'ROUTER', db)
+ def prepare(self):
+ if is_network_prepared():
+ return
+ self.info()
for net_type, gw, lo, hi in self.db.get_route_tbl():
lctl.add_route(net_type, gw, lo, hi)
- if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
- srvdb = self.db.nid2server(lo)
+ if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+ srvdb = self.db.nid2server(lo, net_type)
+
if not srvdb:
panic("no server for nid", lo)
else:
srv = Network(srvdb)
- lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-
-
- lctl.network(self.net_type, self.nid)
- if not is_prepared("RPCDEV_UUID"):
- lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
-
+ lctl.connect(srv)
def cleanup(self):
- self.info(self.net_type, self.nid, self.port)
for net_type, gw, lo, hi in self.db.get_route_tbl():
- if self.net_type in ('tcp', 'toe') and hi == '':
- srvdb = self.db.nid2server(lo)
+ if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+ srvdb = self.db.nid2server(lo, net_type)
if not srvdb:
panic("no server for nid", lo)
else:
e.dump()
cleanup_error(e.rc)
try:
- lctl.del_route(self.net_type, self.nid, lo, hi)
+ lctl.del_route(net_type, gw, lo, hi)
except CommandError, e:
print "del_route failed: ", self.name
e.dump()
cleanup_error(e.rc)
-
- try:
- if is_prepared("RPCDEV_UUID"):
- lctl.cleanup("RPCDEV", "RPCDEV_UUID")
- except CommandError, e:
- print "cleanup failed: RPCDEV"
- e.dump()
- cleanup_error(e.rc)
- try:
- lctl.disconnectAll(self.net_type)
- except CommandError, e:
- print "disconnectAll failed: ", self.name
- e.dump()
- cleanup_error(e.rc)
- if self.net_type in ('tcp', 'toe'):
- # yikes, this ugly! need to save pid in /var/something
- run("killall acceptor")
class LDLM(Module):
def __init__(self,db):
if is_prepared(self.uuid):
return
self.info()
- lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
- setup ="")
+ lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid))
+ def cleanup(self):
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
+
+class PTLRPC(Module):
+ def __init__(self,db):
+ Module.__init__(self, 'PTLRPC', db)
+ self.add_lustre_module('ptlrpc', 'ptlrpc')
+ def prepare(self):
+ if is_prepared(self.uuid):
+ return
+ self.info()
+ lctl.newdev(attach="ptlrpc %s %s" % (self.name, self.uuid))
+ def cleanup(self):
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
class LOV(Module):
def __init__(self,db):
self.devlist = self.db.get_refs('obd')
self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
self.osclist = []
+ self.mdc_uudi = ''
for obd_uuid in self.devlist:
obd = self.db.lookup(obd_uuid)
osc = get_osc(obd, self.name)
class MDSDEV(Module):
def __init__(self,db):
Module.__init__(self, 'MDSDEV', db)
- self.devname = self.db.get_val('devpath','')
+ self.devpath = self.db.get_val('devpath','')
self.size = self.db.get_val_int('devsize', 0)
self.fstype = self.db.get_val('fstype', '')
# overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
- self.uuid = self.db.get_first_ref('target')
- mds = self.db.lookup(self.uuid)
+ target_uuid = self.db.get_first_ref('target')
+ mds = self.db.lookup(target_uuid)
self.name = mds.getName()
self.lovconfig_uuids = mds.get_refs('lovconfig')
# FIXME: if fstype not set, then determine based on kernel version
self.format = self.db.get_val('autoformat', "no")
+
+ active_uuid = mds.get_active_target()
+ if not active_uuid:
+ panic("No target device found:", target_uuid)
+ if active_uuid == self.uuid:
+ self.active = 1
+ else:
+ self.active = 0
+ self.target_dev_uuid = self.uuid
+ self.uuid = target_uuid
+ # modules
if self.fstype == 'extN':
self.add_lustre_module('extN', 'extN')
self.add_lustre_module('mds', 'mds')
if self.fstype:
self.add_lustre_module('obdclass', 'fsfilt_%s' % (self.fstype))
+
+ def load_module(self):
+ if self.active:
+ Module.load_module(self)
def prepare(self):
if is_prepared(self.uuid):
return
- self.info(self.devname, self.fstype, self.format)
- blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
+ if not self.active:
+ debug(self.uuid, "not active")
+ return
+ self.info(self.devpath, self.fstype, self.format)
+ run_acceptors()
+ blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
if not is_prepared('MDT_UUID'):
lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'),
setup ="")
cleanup_error(e.rc)
if is_prepared(self.uuid):
Module.cleanup(self)
- clean_loop(self.devname)
+ clean_loop(self.devpath)
class OSD(Module):
def __init__(self, db):
Module.__init__(self, 'OSD', db)
self.osdtype = self.db.get_val('osdtype')
- self.devname = self.db.get_val('devpath', '')
+ self.devpath = self.db.get_val('devpath', '')
self.size = self.db.get_val_int('devsize', 0)
self.fstype = self.db.get_val('fstype', '')
- self.uuid = self.db.get_first_ref('target')
- ost = self.db.lookup(self.uuid)
+ target_uuid = self.db.get_first_ref('target')
+ ost = self.db.lookup(target_uuid)
self.name = ost.getName()
# FIXME: if fstype not set, then determine based on kernel version
self.format = self.db.get_val('autoformat', 'yes')
if self.fstype == 'extN':
self.add_lustre_module('extN', 'extN')
+
+ active_uuid = ost.get_active_target()
+ if not active_uuid:
+ panic("No target device found:", target_uuid)
+ if active_uuid == self.uuid:
+ self.active = 1
+ else:
+ self.active = 0
+ self.target_dev_uuid = self.uuid
+ self.uuid = target_uuid
+ # modules
self.add_lustre_module('ost', 'ost')
self.add_lustre_module(self.osdtype, self.osdtype)
if self.fstype:
self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
+ def load_module(self):
+ if self.active:
+ Module.load_module(self)
+
# need to check /proc/mounts and /etc/mtab before
# formatting anything.
# FIXME: check if device is already formatted.
def prepare(self):
if is_prepared(self.uuid):
return
- self.info(self.osdtype, self.devname, self.size, self.fstype, self.format)
+ if not self.active:
+ debug(self.uuid, "not active")
+ return
+ self.info(self.osdtype, self.devpath, self.size, self.fstype, self.format)
+ run_acceptors()
if self.osdtype == 'obdecho':
blkdev = ''
else:
- blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
+ blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid),
setup ="%s %s" %(blkdev, self.fstype))
if not is_prepared('OSS_UUID'):
if is_prepared(self.uuid):
Module.cleanup(self)
if not self.osdtype == 'obdecho':
- clean_loop(self.devname)
+ clean_loop(self.devpath)
# Generic client module, used by OSC and MDC
class Client(Module):
- def __init__(self, db, module, owner, target_name, target_uuid):
- self.target_name = target_name
- self.target_uuid = target_uuid
- self.db = db
- node_name = config.select(target_name)
- if node_name:
- self.tgt_dev_uuid = self.db.get_target_device(node_name, target_uuid)
- else:
- self.tgt_dev_uuid = db.get_first_ref('active')
+ def __init__(self, tgtdb, module, owner):
+ self.target_name = tgtdb.getName()
+ self.target_uuid = tgtdb.getUUID()
+ self.db = tgtdb
+
+ self.tgt_dev_uuid = tgtdb.get_active_target()
if not self.tgt_dev_uuid:
- panic("No target device found for target:", target_name)
+ panic("No target device found for target:", self.target_name)
+
self.kmodule_list = []
self._server = None
self._connected = 0
self.module = module
self.module_name = string.upper(module)
- self.name = '%s_%s_%s' % (self.module_name, owner, target_name)
- self.uuid = '%05x_%s_%05x' % (int(random.random() * 1048576), self.name,
- int(random.random() * 1048576))
+ self.name = '%s_%s_%s' % (self.module_name, owner, self.target_name)
+ self.uuid = '%05x%05x_%.14s_%05x%05x' % (int(random.random() * 1048576),
+ int(random.random() * 1048576),self.name,
+ int(random.random() * 1048576),
+ int(random.random() * 1048576))
self.uuid = self.uuid[0:36]
self.lookup_server(self.tgt_dev_uuid)
self.add_lustre_module(module, module)
+ def lookup_server(self, srv_uuid):
+ """ Lookup a server's network information """
+ self._server_nets = self.db.get_ost_net(srv_uuid)
+ if len(self._server_nets) == 0:
+ panic ("Unable to find a server for:", srv_uuid)
+
+ def get_servers(self):
+ return self._server_nets
+
def prepare(self, ignore_connect_failure = 0):
if is_prepared(self.uuid):
return
self.info(self.target_uuid)
- srv = self.get_server()
try:
- if local_net(srv):
- #debug("LOCAL NET")
- lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
+ srv = local_net(self.get_servers())
+ if srv:
+ lctl.connect(srv)
else:
- #debug("NOT LOCAL NET")
- r = find_route(srv)
- if r:
+ srv, r = find_route(self.get_servers())
+ if srv:
lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
else:
- panic ("no route to", srv.nid)
+ panic ("no route to", self.target_uuid)
except CommandError:
if (ignore_connect_failure == 0):
pass
- lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
+ if srv:
+ lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
setup ="%s %s" %(self.target_uuid, srv.uuid))
def cleanup(self):
- srv = self.get_server()
- if local_net(srv):
- Module.cleanup(self)
+ Module.cleanup(self)
+ srv = local_net(self.get_servers())
+ if srv:
+ try:
+ lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+ except CommandError, e:
+ log(self.module_name, "disconnect failed: ", self.name)
+ e.dump()
+ cleanup_error(e.rc)
else:
- self.info(self.targt_uuid)
- r = find_route(srv)
- if r:
+ self.info(self.target_uuid)
+ srv, r = find_route(self.get_servers())
+ if srv:
try:
lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
except CommandError, e:
print "del_route failed: ", self.name
e.dump()
cleanup_error(e.rc)
- Module.cleanup(self)
class MDC(Client):
- def __init__(self, db, owner, target_name, target_uuid):
- Client.__init__(self, db, 'mdc', owner, target_name, target_uuid)
+ def __init__(self, db, owner):
+ Client.__init__(self, db, 'mdc', owner)
class OSC(Client):
- def __init__(self, db, owner, target_name, target_uuid):
- Client.__init__(self, db, 'osc', owner, target_name, target_uuid)
+ def __init__(self, db, owner):
+ Client.__init__(self, db, 'osc', owner)
class COBD(Module):
mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
else:
mdc_uuid = self.vosc.get_mdc_uuid()
+ if not mdc_uuid:
+ panic("Unable to determine MDC UUID. Probably need to cleanup before re-mounting.")
self.info(self.path, self.mds_uuid, self.obd_uuid)
cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
(self.vosc.get_uuid(), mdc_uuid, self.path)
uuids = self._get_all_refs()
return uuids
- def get_ost_net(self, uuid):
- ost = self.lookup(uuid)
- uuid = ost.get_first_ref('network')
- if not uuid:
- return None
- return ost.lookup(uuid)
-
- def nid2server(self, nid):
+ def get_ost_net(self, osd_uuid):
+ srv_list = []
+ if not osd_uuid:
+ return srv_list
+ osd = self.lookup(osd_uuid)
+ node_uuid = osd.get_first_ref('node')
+ node = self.lookup(node_uuid)
+ if not node:
+ panic("unable to find node for osd_uuid:", osd_uuid,
+ " node_ref:", node_uuid)
+ for net_uuid in node.get_networks():
+ db = node.lookup(net_uuid)
+ srv_list.append(Network(db))
+ return srv_list
+
+ def nid2server(self, nid, net_type):
netlist = self.lookup_class('network')
for net_db in netlist:
- if net_db.get_val('nid') == nid:
+ if net_db.get_val('nid') == nid and net_db.get_val('nettype') == net_type:
return net_db
return None
type = self.get_class()
ret=0;
if type in ('network',):
- ret = 10
+ ret = 5
+ elif type in ('routetbl',):
+ ret = 6
+ elif type in ('ptlrpc',):
+ ret = 7
elif type in ('device', 'ldlm'):
ret = 20
elif type in ('osd', 'mdd', 'cobd'):
# Find the target_device for target on a node
# node->profiles->device_refs->target
- def get_target_device(self, node_name, target_uuid):
+ def get_target_device(self, target_uuid, node_name):
node_db = self.lookup_name(node_name)
if not node_db:
return None
return ref[1]
return None
+ def get_active_target(self):
+ target_uuid = self.getUUID()
+ target_name = self.getName()
+ node_name = config.select(target_name)
+ if node_name:
+ tgt_dev_uuid = self.get_target_device(target_uuid, node_name)
+ else:
+ tgt_dev_uuid = self.get_first_ref('active')
+ return tgt_dev_uuid
+
+
# get all network uuids for this node
def get_networks(self):
ret = []
for prof_uuid in prof_list:
prof_db = self.lookup(prof_uuid)
net_list = prof_db.get_refs('network')
- debug("get_networks():", prof_uuid, net_list)
+ #debug("get_networks():", prof_uuid, net_list)
for net_uuid in net_list:
ret.append(net_uuid)
return ret
for t in tbl:
routes = t.getElementsByTagName('route')
for r in routes:
- lo = self.xmlattr(r, 'lo')
- hi = self.xmlattr(r, 'hi')
- res.append((type, gw, lo, hi))
+ net_type = self.xmlattr(r, 'type')
+ if type != net_type:
+ lo = self.xmlattr(r, 'lo')
+ hi = self.xmlattr(r, 'hi')
+ res.append((type, gw, lo, hi))
return res
def get_route_tbl(self):
ret = []
- tbls = self.dom_node.getElementsByTagName('routetbl')
- for tbl in tbls:
- for r in tbl.getElementsByTagName('route'):
- net_type = self.xmlattr(r, 'type')
- gw = self.xmlattr(r, 'gw')
- lo = self.xmlattr(r, 'lo')
- hi = self.xmlattr(r, 'hi')
- ret.append((net_type, gw, lo, hi))
+ for r in self.dom_node.getElementsByTagName('route'):
+ net_type = self.xmlattr(r, 'type')
+ gw = self.xmlattr(r, 'gw')
+ lo = self.xmlattr(r, 'lo')
+ hi = self.xmlattr(r, 'hi')
+ ret.append((net_type, gw, lo, hi))
return ret
# OSC is no longer in the xml, so we have to fake it.
# this is getting ugly and begging for another refactoring
def get_osc(ost_db, owner):
- osc = OSC(ost_db, owner, ost_db.getName(), ost_db.getUUID())
+ osc = OSC(ost_db, owner)
return osc
def get_mdc(db, owner, mds_uuid):
mds_db = db.lookup(mds_uuid);
if not mds_db:
panic("no mds:", mds_uuid)
- mdc = MDC(mds_db, owner, mds_db.getName(), mds_uuid)
+ mdc = MDC(mds_db, owner)
return mdc
def prepare_mdc(db, owner, mds_uuid):
def add_local_interfaces(node_db):
global local_node
- debug("add_local")
for netuuid in node_db.get_networks():
net = node_db.lookup(netuuid)
+ srv = Network(net)
debug("add_local", netuuid)
- local_node.append((net.get_val('nettype'), net.get_val('nid')))
+ local_node.append((srv.net_type, srv.nid))
+ if acceptors.has_key(srv.port):
+ panic("duplicate port:", srv.port)
+ if srv.net_type in ('tcp', 'toe'):
+ acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type,
+ srv.send_mem, srv.recv_mem,
+ srv.irq_affinity,
+ srv.nid_exchange)
def node_needs_router():
return router_flag
#debug("init_route_config: gw is", gw)
if not gw:
continue
- for netuuid in node_db.get_networks():
- db = node_db.lookup(netuuid)
- #debug("init_route_config: tbl: ", db.get_route_tbl())
- if local_type != db.get_val('nettype'):
- for route in db.get_routes(local_type, gw):
- routes.append(route)
- #debug("init_route_config routes:", routes)
+ for route in node_db.get_routes(local_type, gw):
+ routes.append(route)
+ debug("init_route_config routes:", routes)
+
+def local_net(srv_list):
+ global local_node
+ for iface in local_node:
+ for srv in srv_list:
+ #debug("local_net a:", srv.net_type, "b:", iface[0])
+ if srv.net_type == iface[0]:
+ return srv
+ return None
-def local_net(net):
+def local_net_type(net_type):
global local_node
for iface in local_node:
- #debug("local_net a:", net.net_type, "b:", iface[0])
- if net.net_type == iface[0]:
+ if net_type == iface[0]:
return 1
return 0
-def find_route(net):
+def find_route(srv_list):
global local_node, routes
frm_type = local_node[0][0]
- to_type = net.net_type
- to = net.nid
- debug ('looking for route to', to_type,to)
- for r in routes:
- #debug("find_route: ", r)
- if r[2] == to:
- return r
- return None
+ for srv in srv_list:
+ #debug("find_route: srv:", srv.hostaddr, "type: ", srv.net_type)
+ to_type = srv.net_type
+ to = srv.hostaddr
+ #debug ('looking for route to', to_type, to)
+ for r in routes:
+ #debug("find_route: ", r)
+ if r[2] == to:
+ return srv, r
+ return None,None
############################################################
n = None
if type == 'ldlm':
n = LDLM(db)
+ elif type == 'ptlrpc':
+ n = PTLRPC(db)
elif type == 'lov':
n = LOV(db)
elif type == 'network':
n = Network(db)
+ elif type == 'routetbl':
+ n = Router(db)
elif type == 'osd':
n = OSD(db)
elif type == 'cobd':
operation(services)
def doSetup(services):
+ if config.nosetup():
+ return
for s in services:
n = newService(s[1])
n.prepare()
def doModules(services):
+ if config.nomod():
+ return
for s in services:
n = newService(s[1])
n.load_module()
def doCleanup(services):
+ if config.nosetup():
+ return
services.reverse()
for s in services:
n = newService(s[1])
n.cleanup()
def doUnloadModules(services):
+ if config.nomod():
+ return
services.reverse()
for s in services:
n = newService(s[1])
recovery_upcall = node_db.get_val('recovery_upcall', '')
timeout = node_db.get_val_int('timeout', 0)
+ add_local_interfaces(node_db)
if not router_flag:
- add_local_interfaces(node_db)
init_route_config(lustreDB)
# Two step process: (1) load modules, (2) setup lustre
if config.force():
# the command line can override this value
timeout = 5
+ # ugly hack, only need to run lctl commands for --dump
+ if config.lctl_dump():
+ for_each_profile(node_db, prof_list, doCleanup)
+ return
+
sys_set_timeout(timeout)
sys_set_recovery_upcall(recovery_upcall)
for_each_profile(node_db, prof_list, doUnloadModules)
else:
+ # ugly hack, only need to run lctl commands for --dump
+ if config.lctl_dump():
+ for_each_profile(node_db, prof_list, doSetup)
+ return
+
for_each_profile(node_db, prof_list, doModules)
sys_set_debug_path()
"help", "node=", "nomod", "nosetup",
"dump=", "force", "minlevel=", "maxlevel=",
"timeout=", "recovery_upcall=",
- "ldapurl=", "config=", "select="]
+ "ldapurl=", "config=", "select=", "lctl_dump="]
opts = []
args = []
config.verbose(1)
if o in ("-n", "--noexec"):
config.noexec(1)
- config.verbose(1)
if o == "--portals":
config.portals_dir(a)
if o == "--lustre":
config.config_name(a)
if o == "--select":
config.init_select(a)
+ if o == "--lctl_dump":
+ config.lctl_dump(a)
return args
# Shutdown does steps in reverse
#
def main():
- global TCP_ACCEPTOR, lctl, MAXTCPBUF
+ global lctl, MAXTCPBUF
host = socket.gethostname()
setupModulePath(sys.argv[0])
- TCP_ACCEPTOR = find_prog('acceptor')
- if not TCP_ACCEPTOR:
- if config.noexec():
- TCP_ACCEPTOR = 'acceptor'
- debug('! acceptor not found')
- else:
- panic('acceptor not found')
-
lctl = LCTLInterface('lctl')
-
- sys_make_devices()
- sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
- sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
+ if config.lctl_dump():
+ lctl.use_save_file(config.lctl_dump())
+ else:
+ sys_make_devices()
+ sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
+ sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
doHost(db, node_list)
if first_cleanup_error:
sys.exit(first_cleanup_error)
-
+