Whamcloud - gitweb
land 0.5.20.3 b_devel onto HEAD (b_devel will remain)
[fs/lustre-release.git] / lustre / utils / lconf.in
index 46549cc..cbe05dd 100755 (executable)
 # Based in part on the XML obdctl modifications done by Brian Behlendorf 
 
 import sys, getopt, types
-import string, os, stat, popen2, socket, time, random, fcntl, FCNTL, select
+import string, os, stat, popen2, socket, time, random, fcntl, select
 import re, exceptions
 import xml.dom.minidom
 
+if sys.version[0] == '1':
+    from FCNTL import F_GETFL, F_SETFL
+else:
+    from fcntl import F_GETFL, F_SETFL
+
 # Global parameters
-TCP_ACCEPTOR = ''
 MAXTCPBUF = 1048576
 DEFAULT_TCPBUF = 1048576
 #
@@ -72,7 +76,7 @@ config.xml          Lustre configuration in xml format.
                     Levels are aproximatly like:
                             10 - network
                             20 - device, ldlm
-                            30 - obd, mdd
+                            30 - osd, mdd
                             40 - mds, ost
                             50 - mdc, osc
                             60 - lov
@@ -119,6 +123,7 @@ class Config:
         self._ldapurl = ''
         self._config_name = ''
         self._select = {}
+        self._lctl_dump = ''
 
     def verbose(self, flag = None):
         if flag: self._verbose = flag
@@ -215,6 +220,10 @@ class Config:
             return self._select[srv]
         return None
 
+    def lctl_dump(self, val = None):
+        if val: self._lctl_dump = val
+        return self._lctl_dump
+
 
 config = Config()
 
@@ -275,6 +284,104 @@ class LconfError (exceptions.Exception):
 
 
 # ============================================================
+# handle daemons, like the acceptor
+class DaemonHandler:
+    """ Manage starting and stopping a daemon. Assumes daemon manages
+    it's own pid file. """
+
+    def __init__(self, cmd):
+        self.command = cmd
+        self.path =""
+
+    def start(self):
+        if self.running():
+            log(self.command, "already running.")
+        if not self.path:
+            self.path = find_prog(self.command)
+            if not self.path:
+                panic(self.command, "not found.")
+        ret, out = runcmd(self.path +' '+ self.command_line())
+        if ret:
+            raise CommandError(self.path, out, ret)
+
+    def stop(self):
+        if self.running():
+            pid = self.read_pidfile()
+            try:
+                log ("killing process", pid)
+                os.kill(pid, 15)
+                #time.sleep(1) # let daemon die
+            except OSError, e:
+                log("unable to kill", self.command, e)
+            if self.running():
+                log("unable to kill", self.command)
+
+    def running(self):
+        pid = self.read_pidfile()
+        if pid:
+            try:
+                os.kill(pid, 0)
+            except OSError:
+                self.clean_pidfile()
+            else:
+                return 1
+        return 0
+
+    def read_pidfile(self):
+        try:
+            fp = open(self.pidfile(), 'r')
+            pid = int(fp.read())
+            fp.close()
+            return pid
+        except IOError:
+            return 0
+        
+    def clean_pidfile(self):
+        """ Remove a stale pidfile """
+        log("removing stale pidfile:", self.pidfile())
+        try:
+            os.unlink(self.pidfile())
+        except OSError, e:
+            log(self.pidfile(), e)
+            
+class AcceptorHandler(DaemonHandler):
+    def __init__(self, port, net_type, send_mem, recv_mem, irq_aff, nid_xchg):
+        DaemonHandler.__init__(self, "acceptor")
+        self.port = port
+        self.flags = ''
+        self.send_mem = send_mem
+        self.recv_mem = recv_mem
+
+        if net_type == 'toe':
+            self.flags = self.flags + ' -N 4'
+        if irq_aff:
+            self.flags = self.flags + ' -i'
+        if nid_xchg:
+            self.flags = self.flags + ' -x'
+
+    def pidfile(self):
+        return "/var/run/%s-%d.pid" % (self.command, self.port)
+
+    def command_line(self):
+        return string.join(map(str,('-s', self.send_mem, '-r', self.recv_mem, self.flags, self.port)))
+    
+acceptors = {}
+
+# start the acceptors
+def run_acceptors():
+    for port in acceptors.keys():
+        daemon = acceptors[port]
+        if not daemon.running():
+            daemon.start()
+
+def stop_acceptor(port):
+    if acceptors.has_key(port):
+        daemon = acceptors[port]
+        if daemon.running():
+            daemon.stop()
+        
+
+# ============================================================
 # handle lctl interface
 class LCTLInterface:
     """
@@ -286,6 +393,7 @@ class LCTLInterface:
         Initialize close by finding the lctl binary.
         """
         self.lctl = find_prog(cmd)
+        self.save_file = ''
         if not self.lctl:
             if config.noexec():
                 debug('! lctl not found')
@@ -293,9 +401,12 @@ class LCTLInterface:
             else:
                 raise CommandError('lctl', "unable to find lctl binary.")
 
+    def use_save_file(self, file):
+        self.save_file = file
+        
     def set_nonblock(self, fd):
-        fl = fcntl.fcntl(fd, FCNTL.F_GETFL)
-        fcntl.fcntl(fd, FCNTL.F_SETFL, fl | os.O_NDELAY)
+        fl = fcntl.fcntl(fd, F_GETFL)
+        fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
 
     def run(self, cmds):
         """
@@ -306,10 +417,14 @@ class LCTLInterface:
         should modify command line to accept multiple commands, or
         create complex command line options
         """
-        debug("+", self.lctl, cmds)
+        cmd_line = self.lctl
+        if self.save_file:
+            cmds = '\n  dump ' + self.save_file + cmds
+
+        debug("+", cmd_line, cmds)
         if config.noexec(): return (0, [])
 
-        child = popen2.Popen3(self.lctl, 1) # Capture stdout and stderr from command
+        child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
         child.tochild.write(cmds + "\n")
         child.tochild.close()
 
@@ -365,33 +480,28 @@ class LCTLInterface:
             cmds =  """
   network %s
   mynid %s
-  add_uuid self %s
-  quit""" % (net, nid, nid)
-        else:
-            cmds =  """
-  network %s
-  add_uuid self %s
-  quit""" % (net, nid)
-            
-        self.run(cmds)
+  quit """ % (net, nid)
+            self.run(cmds)
 
     # create a new connection
-    def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
-        if net  in ('tcp', 'toe'):
-            cmds =  """
+    def connect(self, srv):
+        cmds =  "\n  add_uuid %s %s %s" % (srv.uuid, srv.nid, srv.net_type)
+        if srv.net_type  in ('tcp', 'toe') and not config.lctl_dump():
+            flags = ''
+            if srv.irq_affinity:
+                flags = flags + 'i'
+            if srv.nid_exchange:
+                flags = flags + 'x'
+            cmds =  """%s          
   network %s
-  add_uuid %s %s
   send_mem %d
   recv_mem %d
-  connect %s %d
-  quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port,  )
-        else:
-            cmds =  """
-  network %s
-  add_uuid %s %s
-  connect %s %d
-  quit""" % (net, servuuid, nid, nid, port,  )
-            
+  connect %s %d %s""" % (cmds, srv.net_type,
+             srv.send_mem,
+             srv.recv_mem,
+             srv.hostaddr, srv.port, flags )
+
+        cmds = cmds + "\n  quit"
         self.run(cmds)
                 
     # add a route to a range
@@ -399,7 +509,8 @@ class LCTLInterface:
         cmds =  """
   network %s
   add_route %s %s %s
-  quit  """ % (net, gw, lo, hi)
+  quit  """ % (net,
+               gw, lo, hi)
         self.run(cmds)
 
                 
@@ -415,9 +526,11 @@ class LCTLInterface:
     def add_route_host(self, net, uuid, gw, tgt):
         cmds =  """
   network %s
-  add_uuid %s %s
+  add_uuid %s %s %s
   add_route %s %s
-  quit """ % (net, uuid, tgt, gw, tgt)
+  quit """ % (net,
+              uuid, tgt, net,
+              gw, tgt)
         self.run(cmds)
 
     # add a route to a range
@@ -445,7 +558,6 @@ class LCTLInterface:
         cmds =  """
   ignore_errors
   network %s
-  del_uuid self
   disconnect
   quit""" % (net)
         self.run(cmds)
@@ -464,8 +576,8 @@ class LCTLInterface:
         cmds = """
   ignore_errors
   device $%s
-  cleanup
-  detach %s
+  cleanup %s
+  detach
   quit""" % (name, ('', 'force')[config.force()])
         self.run(cmds)
 
@@ -502,8 +614,7 @@ class LCTLInterface:
 # Run a command and return the output and status.
 # stderr is sent to /dev/null, could use popen3 to
 # save it if necessary
-def run(*args):
-    cmd = string.join(map(str,args))
+def runcmd(cmd):
     debug ("+", cmd)
     if config.noexec(): return (0, [])
     f = os.popen(cmd + ' 2>&1')
@@ -515,6 +626,10 @@ def run(*args):
         ret = 0
     return (ret, out)
 
+def run(*args):
+    cmd = string.join(map(str,args))
+    return runcmd(cmd)
+
 # Run a command in the background.
 def run_daemon(*args):
     cmd = string.join(map(str,args))
@@ -535,7 +650,7 @@ def find_prog(cmd):
     cmdpath = os.path.dirname(sys.argv[0])
     syspath.insert(0, cmdpath);
     if config.portals_dir():
-        syspath.insert(0, os.path.join(cmdpath, config.portals_dir()+'/linux/utils/'))
+        syspath.insert(0, os.path.join(config.portals_dir()+'/linux/utils/'))
     for d in syspath:
         prog = os.path.join(d,cmd)
         if os.access(prog, os.X_OK):
@@ -575,23 +690,20 @@ def is_block(path):
 
 # build fs according to type
 # fixme: dangerous
-def mkfs(fstype, dev):
+def mkfs(dev, devsize, fstype):
+    block_cnt = ''
+    if devsize:
+        # devsize is in 1k, and fs block count is in 4k
+        block_cnt = devsize/4
+
     if(fstype in ('ext3', 'extN')):
-        mkfs = 'mkfs.ext2 -j -b 4096'
+        mkfs = 'mkfs.ext2 -j -b 4096 -F '
     elif (fstype == 'reiserfs'):
-        mkfs = 'mkfs.reiserfs -f'
+        mkfs = 'mkreiserfs -ff'
     else:
         print 'unsupported fs type: ', fstype
-    if not is_block(dev):
-        if(fstype in ('ext3', 'extN')):
-            force = '-F'
-        elif (fstype == 'reiserfs'):
-            force = ''
-        else:
-            print 'unsupported fs type: ', fstype
-    else:
-        force = ''
-    (ret, out) = run (mkfs, force, dev)
+
+    (ret, out) = run (mkfs, dev, block_cnt)
     if ret:
         panic("Unable to build fs:", dev)
     # enable hash tree indexing on fsswe
@@ -676,7 +788,7 @@ def block_dev(dev, size, fstype, format):
     if not is_block(dev):
         dev = init_loop(dev, size, fstype)
     if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
-        mkfs(fstype, dev)
+        mkfs(dev, size, fstype)
 
 #    else:
 #        panic("device:", dev,
@@ -694,6 +806,16 @@ def if2addr(iface):
     ip = string.split(addr, ':')[1]
     return ip
 
+def get_local_nid(net_type, wildcard):
+    """Return the local nid. First look for an elan interface,
+      then use the local address. """
+    local = ""
+    if os.access('/proc/elan/device0/position', os.R_OK):
+        local = get_local_address('elan', '*')
+    else:
+        local = get_local_address(net_type, wildcard)
+    return local
+        
 def get_local_address(net_type, wildcard):
     """Return the local address for the network type."""
     local = ""
@@ -728,6 +850,8 @@ def is_prepared(uuid):
     """Return true if a device exists for the uuid"""
     # expect this format:
     # 1 UP ldlm ldlm ldlm_UUID 2
+    if config.lctl_dump():
+        return 0
     try:
         out = lctl.device_list()
         for s in out:
@@ -736,6 +860,21 @@ def is_prepared(uuid):
     except CommandError, e:
         e.dump()
     return 0
+
+def is_network_prepared():
+    """If the  PTLRPC device exists, then assumet that all networking
+       has been configured"""
+    if config.lctl_dump():
+        return 0
+    try:
+        out = lctl.device_list()
+        for s in out:
+            if 'RPCDEV_UUID' == string.split(s)[4]:
+                return 1
+    except CommandError, e:
+        e.dump()
+    return 0
+    
     
 def fs_is_mounted(path):
     """Return true if path is a mounted lustre filesystem"""
@@ -772,34 +911,16 @@ class Module:
         msg = string.join(map(str,args))
         print self.module_name + ":", self.name, self.uuid, msg
 
-    def lookup_server(self, srv_uuid):
-        """ Lookup a server's network information """
-        net = self.db.get_ost_net(srv_uuid)
-        if not net:
-            panic ("Unable to find a server for:", srv_uuid)
-        self._server = Network(net)
-
-    def get_server(self):
-        return self._server
-
     def cleanup(self):
         """ default cleanup, used for most modules """
         self.info()
-        srv = self.get_server()
-        if srv and local_net(srv):
-            try:
-                lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
-            except CommandError, e:
-                log(self.module_name, "disconnect failed: ", self.name)
-                e.dump()
-                cleanup_error(e.rc)
         try:
             lctl.cleanup(self.name, self.uuid)
         except CommandError, e:
             log(self.module_name, "cleanup failed: ", self.name)
             e.dump()
             cleanup_error(e.rc)
-
+            
     def add_portals_module(self, dev_dir, modname):
         """Append a module to list of modules to load."""
         self.kmodule_list.append((config.portals_dir(), dev_dir, modname))
@@ -855,64 +976,83 @@ class Module:
                 log('! unable to unload module:', mod)
                 logall(out)
         
-
 class Network(Module):
     def __init__(self,db):
         Module.__init__(self, 'NETWORK', db)
         self.net_type = self.db.get_val('nettype')
         self.nid = self.db.get_val('nid', '*')
         self.port = self.db.get_val_int('port', 0)
-        self.send_mem = self.db.get_val_int('send_mem', DEFAULT_TCPBUF)
-        self.recv_mem = self.db.get_val_int('recv_mem', DEFAULT_TCPBUF)
+        self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF)
+        self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF)
+        self.irq_affinity = self.db.get_val_int('irqaffinity', 0)
+        self.nid_exchange = self.db.get_val_int('nidexchange', 0)
+
         if '*' in self.nid:
-            self.nid = get_local_address(self.net_type, self.nid)
+            self.nid = get_local_nid(self.net_type, self.nid)
             if not self.nid:
                 panic("unable to set nid for", self.net_type, self.nid)
             debug("nid:", self.nid)
 
+        self.hostaddr = self.db.get_val('hostaddr', self.nid)
+        if '*' in self.hostaddr:
+            self.hostaddr = get_local_address(self.net_type, self.hostaddr)
+            if not self.nid:
+                panic("unable to set nid for", self.net_type, self.hostaddr)
+            debug("hostaddr:", self.hostaddr)
+        # debug ( "hostaddr ", self.hostaddr, "net_type", self.net_type)
+
         self.add_portals_module("linux/oslib", 'portals')
         if node_needs_router():
             self.add_portals_module("linux/router", 'kptlrouter')
         if self.net_type == 'tcp':
             self.add_portals_module("linux/socknal", 'ksocknal')
         if self.net_type == 'toe':
-            self.add_portals_odule("/linux/toenal", 'ktoenal')
+            self.add_portals_module("/linux/toenal", 'ktoenal')
         if self.net_type == 'elan':
             self.add_portals_module("/linux/rqswnal", 'kqswnal')
         if self.net_type == 'gm':
             self.add_portals_module("/linux/gmnal", 'kgmnal')
         self.add_lustre_module('obdclass', 'obdclass')
-        self.add_lustre_module('ptlrpc', 'ptlrpc')
 
     def prepare(self):
+        if is_network_prepared():
+            return
+        self.info(self.net_type, self.nid, self.port)
+        lctl.network(self.net_type, self.nid)
+
+    def cleanup(self):
         self.info(self.net_type, self.nid, self.port)
         if self.net_type in ('tcp', 'toe'):
-            nal_id = '' # default is socknal
-            if self.net_type == 'toe':
-                nal_id = '-N 4'
-            ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, nal_id, self.port)
-            if ret:
-                raise CommandError(TCP_ACCEPTOR, out, ret)
+            stop_acceptor(self.port)
+        try:
+            lctl.disconnectAll(self.net_type)
+        except CommandError, e:
+            print "disconnectAll failed: ", self.name
+            e.dump()
+            cleanup_error(e.rc)
+
+class Router(Module):
+    def __init__(self,db):
+        Module.__init__(self, 'ROUTER', db)
+    def prepare(self):
+        if is_network_prepared():
+            return
+        self.info()
         for net_type, gw, lo, hi in self.db.get_route_tbl():
             lctl.add_route(net_type, gw, lo, hi)
-            if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
-                srvdb = self.db.nid2server(lo)
-                if not srv:
+            if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+                srvdb = self.db.nid2server(lo, net_type)
+
+                if not srvdb:
                     panic("no server for nid", lo)
                 else:
                     srv = Network(srvdb)
-                    lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-
-            
-        lctl.network(self.net_type, self.nid)
-        lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
-
+                    lctl.connect(srv)
     def cleanup(self):
-        self.info(self.net_type, self.nid, self.port)
         for net_type, gw, lo, hi in self.db.get_route_tbl():
-            if self.net_type in ('tcp', 'toe') and hi == '':
-                srvdb = self.db.nid2server(lo)
-                if not srv:
+            if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+                srvdb = self.db.nid2server(lo, net_type)
+                if not srvdb:
                     panic("no server for nid", lo)
                 else:
                     srv = Network(srvdb)
@@ -923,27 +1063,11 @@ class Network(Module):
                         e.dump()
                         cleanup_error(e.rc)
             try:
-                lctl.del_route(self.net_type, self.nid, lo, hi)
+                lctl.del_route(net_type, gw, lo, hi)
             except CommandError, e:
                 print "del_route failed: ", self.name
                 e.dump()
                 cleanup_error(e.rc)
-              
-        try:
-            lctl.cleanup("RPCDEV", "RPCDEV_UUID")
-        except CommandError, e:
-            print "cleanup failed: ", self.name
-            e.dump()
-            cleanup_error(e.rc)
-        try:
-            lctl.disconnectAll(self.net_type)
-        except CommandError, e:
-            print "disconnectAll failed: ", self.name
-            e.dump()
-            cleanup_error(e.rc)
-        if self.net_type in ('tcp', 'toe'):
-            # yikes, this ugly! need to save pid in /var/something
-            run("killall acceptor")
 
 class LDLM(Module):
     def __init__(self,db):
@@ -953,12 +1077,29 @@ class LDLM(Module):
         if is_prepared(self.uuid):
             return
         self.info()
-        lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
-                    setup ="")
+        lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid))
+    def cleanup(self):
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
+
+class PTLRPC(Module):
+    def __init__(self,db):
+        Module.__init__(self, 'PTLRPC', db)
+        self.add_lustre_module('ptlrpc', 'ptlrpc') 
+    def prepare(self):
+        if is_prepared(self.uuid):
+            return
+        self.info()
+        lctl.newdev(attach="ptlrpc %s %s" % (self.name, self.uuid))
+    def cleanup(self):
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
 
 class LOV(Module):
     def __init__(self,db):
         Module.__init__(self, 'LOV', db)
+        self.add_lustre_module('mdc', 'mdc')
+        self.add_lustre_module('lov', 'lov')
         self.mds_uuid = self.db.get_first_ref('mds')
         mds= self.db.lookup(self.mds_uuid)
         self.mds_name = mds.getName()
@@ -967,66 +1108,50 @@ class LOV(Module):
         self.pattern = self.db.get_val_int('stripepattern', 0)
         self.devlist = self.db.get_refs('obd')
         self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
-        self.add_lustre_module('mdc', 'mdc')
-        self.add_lustre_module('lov', 'lov')
-
-    def prepare(self):
-        if is_prepared(self.uuid):
-            return
+        self.osclist = []
+        self.mdc_uudi = ''
         for obd_uuid in self.devlist:
             obd = self.db.lookup(obd_uuid)
-            osc = get_osc(obd)
+            osc = get_osc(obd, self.name)
             if osc:
-                try:
-                    # Ignore connection failures, because the LOV will DTRT with
-                    # an unconnected OSC.
-                    osc.prepare(ignore_connect_failure=1)
-                except CommandError:
-                    print "Error preparing OSC %s (inactive)\n" % osc_uuid
+                self.osclist.append(osc)
             else:
-                panic('osc not found:', osc_uuid)
-        mdc_uuid = prepare_mdc(self.db, self.mds_uuid)
+                panic('osc not found:', obd_uuid)
+            
+    def prepare(self):
+        if is_prepared(self.uuid):
+            return
+        for osc in self.osclist:
+            try:
+                # Ignore connection failures, because the LOV will DTRT with
+                # an unconnected OSC.
+                osc.prepare(ignore_connect_failure=1)
+            except CommandError:
+                print "Error preparing OSC %s (inactive)\n" % osc.uuid
+        self.mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
         self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
                   self.stripe_off, self.pattern, self.devlist, self.mds_name)
         lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
-                    setup ="%s" % (mdc_uuid))
+                    setup ="%s" % (self.mdc_uuid))
 
     def cleanup(self):
-        if not is_prepared(self.uuid):
-            return
-        for obd_uuid in self.devlist:
-            obd = self.db.lookup(obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.cleanup()
-            else:
-                panic('osc not found:', osc_uuid)
-        Module.cleanup(self)
-        cleanup_mdc(self.db, self.mds_uuid)
-
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
+        for osc in self.osclist:
+            osc.cleanup()
+        cleanup_mdc(self.db, self.name, self.mds_uuid)
 
     def load_module(self):
-        for obd_uuid in self.devlist:
-            obd = self.db.lookup(obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.load_module()
-                break
-            else:
-                panic('osc not found:', osc_uuid)
+        for osc in self.osclist:
+            osc.load_module()
+            break
         Module.load_module(self)
 
-
     def cleanup_module(self):
         Module.cleanup_module(self)
-        for obd_uuid in self.devlist:
-            obd = self.db.lookup(obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.cleanup_module()
-                break
-            else:
-                panic('osc not found:', osc_uuid)
+        for osc in self.osclist:
+            osc.cleanup_module()
+            break
 
 class LOVConfig(Module):
     def __init__(self,db):
@@ -1051,27 +1176,46 @@ class LOVConfig(Module):
 class MDSDEV(Module):
     def __init__(self,db):
         Module.__init__(self, 'MDSDEV', db)
-        self.devname = self.db.get_val('devpath','')
+        self.devpath = self.db.get_val('devpath','')
         self.size = self.db.get_val_int('devsize', 0)
         self.fstype = self.db.get_val('fstype', '')
         # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
-        self.uuid = self.db.get_first_ref('mds')
-        mds = self.db.lookup(self.uuid)
+        target_uuid = self.db.get_first_ref('target')
+        mds = self.db.lookup(target_uuid)
         self.name = mds.getName()
         self.lovconfig_uuids = mds.get_refs('lovconfig')
         # FIXME: if fstype not set, then determine based on kernel version
         self.format = self.db.get_val('autoformat', "no")
+
+        active_uuid = mds.get_active_target()
+        if not active_uuid:
+            panic("No target device found:", target_uuid)
+        if active_uuid == self.uuid:
+            self.active = 1
+        else:
+            self.active = 0
+        self.target_dev_uuid = self.uuid
+        self.uuid = target_uuid
+        # modules
         if self.fstype == 'extN':
             self.add_lustre_module('extN', 'extN') 
         self.add_lustre_module('mds', 'mds')
         if self.fstype:
             self.add_lustre_module('obdclass', 'fsfilt_%s' % (self.fstype))
+
+    def load_module(self):
+        if self.active:
+            Module.load_module(self)
             
     def prepare(self):
         if is_prepared(self.uuid):
             return
-        self.info(self.devname, self.fstype, self.format)
-        blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
+        if not self.active:
+            debug(self.uuid, "not active")
+            return
+        self.info(self.devpath, self.fstype, self.format)
+        run_acceptors()
+        blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
         if not is_prepared('MDT_UUID'):
             lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'),
                         setup ="")
@@ -1090,83 +1234,166 @@ class MDSDEV(Module):
                 print "cleanup failed: ", self.name
                 e.dump()
                 cleanup_error(e.rc)
-        if not is_prepared(self.uuid):
-            return
-        Module.cleanup(self)
-        clean_loop(self.devname)
-
-# Very unusual case, as there is no MDC element in the XML anymore
-# Builds itself from an MDS node
-class MDC(Module):
-    def __init__(self,db):
-        self.mds_uuid = db.getUUID()
-        self.mds_name = db.getName()
-        self.db = db
-        node_name =  config.select(self.mds_name)
-        if node_name:
-            self.mdd_uuid = self.db.get_mdd(node_name, self.mds_uuid)
-        else:
-            self.mdd_uuid = db.get_first_ref('active')
-        if not self.mdd_uuid:
-            panic("No MDSDEV found for MDS service:", self.mds_name)
-        self.module_name = 'MDC'
-        self.kmodule_list = []
-        self._server = None
-        self._connected = 0
-
-        host = socket.gethostname()
-        self.name = 'MDC_%s' % (self.mds_name)
-        self.uuid = '%s_%05x_%05x' % (self.name, int(random.random() * 1048576),
-                                      int(random.random() * 1048576))
-
-        self.lookup_server(self.mdd_uuid)
-        self.add_lustre_module('mdc', 'mdc')
-
-    def prepare(self):
         if is_prepared(self.uuid):
-            return
-        self.info(self.mds_uuid)
-        srv = self.get_server()
-        lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-        lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
-                        setup ="%s %s" %(self.mds_uuid, srv.uuid))
-            
-class OBD(Module):
+            Module.cleanup(self)
+        clean_loop(self.devpath)
+
+class OSD(Module):
     def __init__(self, db):
-        Module.__init__(self, 'OBD', db)
-        self.obdtype = self.db.get_val('obdtype')
-        self.devname = self.db.get_val('devpath', '')
+        Module.__init__(self, 'OSD', db)
+        self.osdtype = self.db.get_val('osdtype')
+        self.devpath = self.db.get_val('devpath', '')
         self.size = self.db.get_val_int('devsize', 0)
         self.fstype = self.db.get_val('fstype', '')
-        self.active_target = self.db.get_first_ref('active')
+        target_uuid = self.db.get_first_ref('target')
+        ost = self.db.lookup(target_uuid)
+        self.name = ost.getName()
         # FIXME: if fstype not set, then determine based on kernel version
         self.format = self.db.get_val('autoformat', 'yes')
         if self.fstype == 'extN':
             self.add_lustre_module('extN', 'extN') 
-        self.add_lustre_module(self.obdtype, self.obdtype)
+
+        active_uuid = ost.get_active_target()
+        if not active_uuid:
+            panic("No target device found:", target_uuid)
+        if active_uuid == self.uuid:
+            self.active = 1
+        else:
+            self.active = 0
+        self.target_dev_uuid = self.uuid
+        self.uuid = target_uuid
+        # modules
+        self.add_lustre_module('ost', 'ost')
+        self.add_lustre_module(self.osdtype, self.osdtype)
         if self.fstype:
             self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
 
+    def load_module(self):
+        if self.active:
+            Module.load_module(self)
+
     # need to check /proc/mounts and /etc/mtab before
     # formatting anything.
     # FIXME: check if device is already formatted.
     def prepare(self):
         if is_prepared(self.uuid):
             return
-        self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
-        if self.obdtype == 'obdecho':
+        if not self.active:
+            debug(self.uuid, "not active")
+            return
+        self.info(self.osdtype, self.devpath, self.size, self.fstype, self.format)
+        run_acceptors()
+        if self.osdtype == 'obdecho':
             blkdev = ''
         else:
-            blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
-        lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
+            blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
+        lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid),
                     setup ="%s %s" %(blkdev, self.fstype))
+        if not is_prepared('OSS_UUID'):
+            lctl.newdev(attach="ost %s %s" % ('OSS', 'OSS_UUID'),
+                        setup ="")
+
     def cleanup(self):
-        if not is_prepared(self.uuid):
+        if is_prepared('OSS_UUID'):
+            try:
+                lctl.cleanup("OSS", "OSS_UUID")
+            except CommandError, e:
+                print "cleanup failed: ", self.name
+                e.dump()
+                cleanup_error(e.rc)
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
+        if not self.osdtype == 'obdecho':
+            clean_loop(self.devpath)
+
+# Generic client module, used by OSC and MDC
+class Client(Module):
+    def __init__(self, tgtdb, module, owner):
+        self.target_name = tgtdb.getName()
+        self.target_uuid = tgtdb.getUUID()
+        self.db = tgtdb
+
+        self.tgt_dev_uuid = tgtdb.get_active_target()
+        if not self.tgt_dev_uuid:
+            panic("No target device found for target:", self.target_name)
+            
+        self.kmodule_list = []
+        self._server = None
+        self._connected = 0
+
+        self.module = module
+        self.module_name = string.upper(module)
+        self.name = '%s_%s_%s' % (self.module_name, owner, self.target_name)
+        self.uuid = '%05x%05x_%.14s_%05x%05x' % (int(random.random() * 1048576),
+                                              int(random.random() * 1048576),self.name,
+                                              int(random.random() * 1048576),
+                                              int(random.random() * 1048576))
+        self.uuid = self.uuid[0:36]
+        self.lookup_server(self.tgt_dev_uuid)
+        self.add_lustre_module(module, module)
+
+    def lookup_server(self, srv_uuid):
+        """ Lookup a server's network information """
+        self._server_nets = self.db.get_ost_net(srv_uuid)
+        if len(self._server_nets) == 0:
+            panic ("Unable to find a server for:", srv_uuid)
+
+    def get_servers(self):
+        return self._server_nets
+
+    def prepare(self, ignore_connect_failure = 0):
+        if is_prepared(self.uuid):
             return
+        self.info(self.target_uuid)
+        try:
+            srv = local_net(self.get_servers())
+            if srv:
+                lctl.connect(srv)
+            else:
+                srv, r =  find_route(self.get_servers())
+                if srv:
+                    lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
+                else:
+                    panic ("no route to",  self.target_uuid)
+        except CommandError:
+            if (ignore_connect_failure == 0):
+                pass
+        if srv:
+            lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
+                        setup ="%s %s" %(self.target_uuid, srv.uuid))
+
+    def cleanup(self):
         Module.cleanup(self)
-        if not self.obdtype == 'obdecho':
-            clean_loop(self.devname)
+        srv = local_net(self.get_servers())
+        if srv:
+            try:
+                lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+            except CommandError, e:
+                log(self.module_name, "disconnect failed: ", self.name)
+                e.dump()
+                cleanup_error(e.rc)
+        else:
+            self.info(self.target_uuid)
+            srv, r =  find_route(self.get_servers())
+            if srv:
+                try:
+                    lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
+                except CommandError, e:
+                    print "del_route failed: ", self.name
+                    e.dump()
+                    cleanup_error(e.rc)
+
 
+
+class MDC(Client):
+    def __init__(self, db, owner):
+         Client.__init__(self, db, 'mdc', owner)
+
+class OSC(Client):
+    def __init__(self, db, owner):
+         Client.__init__(self, db, 'osc', owner)
+
+            
 class COBD(Module):
     def __init__(self, db):
         Module.__init__(self, 'COBD', db)
@@ -1184,28 +1411,15 @@ class COBD(Module):
         lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
                     setup ="%s %s" %(self.real_uuid, self.cache_uuid))
 
-class OST(Module):
-    def __init__(self,db):
-        Module.__init__(self, 'OST', db)
-        self.obd_uuid = self.db.get_first_ref('obd')
-        self.add_lustre_module('ost', 'ost')
-
-    def prepare(self):
-        if is_prepared(self.uuid):
-            return
-        self.info(self.obd_uuid)
-        lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
-                    setup ="%s" % (self.obd_uuid))
-
 
 # virtual interface for  OSC and LOV
 class VOSC(Module):
-    def __init__(self,db):
+    def __init__(self,db, owner):
         Module.__init__(self, 'VOSC', db)
         if db.get_class() == 'lov':
             self.osc = LOV(db)
         else:
-            self.osc = get_osc(db)
+            self.osc = get_osc(db, owner)
     def get_uuid(self):
         return self.osc.uuid
     def prepare(self):
@@ -1216,61 +1430,13 @@ class VOSC(Module):
         self.osc.load_module()
     def cleanup_module(self):
         self.osc.cleanup_module()
-        
+    def need_mdc(self):
+        return self.db.get_class() != 'lov'
+    def get_mdc_uuid(self):
+        if self.db.get_class() == 'lov':
+            return self.osc.mdc_uuid
+        return ''
 
-class OSC(Module):
-    def __init__(self, db, obd_name, obd_uuid, ost_uuid):
-        self.db = db
-        self.module_name = 'OSC'
-        self.name = 'OSC_%s' % (obd_name)
-        self.uuid = '%s_%05x' % (self.name, int(random.random() * 1048576))
-        self.kmodule_list = []
-        self._server = None
-        self._connected = 0
-
-        self.obd_uuid = obd_uuid
-        self.ost_uuid = ost_uuid
-        debug("OSC:", obd_uuid, ost_uuid)
-        self.lookup_server(self.ost_uuid)
-        self.add_lustre_module('osc', 'osc')
-
-    def prepare(self, ignore_connect_failure = 0):
-        if is_prepared(self.uuid):
-            return
-        self.info(self.obd_uuid, self.ost_uuid)
-        srv = self.get_server()
-        try:
-            if local_net(srv):
-                lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-            else:
-                r =  find_route(srv)
-                if r:
-                    lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
-                else:
-                    panic ("no route to",  srv.nid)
-        except CommandError:
-            if (ignore_connect_failure == 0):
-                pass
-            
-        lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
-                    setup ="%s %s" %(self.obd_uuid, srv.uuid))
-
-    def cleanup(self):
-        srv = self.get_server()
-        if local_net(srv):
-            Module.cleanup(self)
-        else:
-            self.info(self.obd_uuid, self.ost_uuid)
-            r =  find_route(srv)
-            if r:
-                try:
-                    lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
-                except CommandError, e:
-                    print "del_route failed: ", self.name
-                    e.dump()
-                    cleanup_error(e.rc)
-            Module.cleanup(self)
-            
 
 class ECHO_CLIENT(Module):
     def __init__(self,db):
@@ -1278,7 +1444,7 @@ class ECHO_CLIENT(Module):
         self.add_lustre_module('obdecho', 'obdecho')
         self.obd_uuid = self.db.get_first_ref('obd')
         obd = self.db.lookup(self.obd_uuid)
-        self.osc = VOSC(obd)
+        self.osc = VOSC(obd, self.name)
 
     def prepare(self):
         if is_prepared(self.uuid):
@@ -1290,8 +1456,8 @@ class ECHO_CLIENT(Module):
                     setup = self.osc.get_uuid())
 
     def cleanup(self):
-        if not is_prepared(self.uuid):
-            return
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
         self.osc.cleanup()
 
     def load_module(self):
@@ -1308,18 +1474,24 @@ class Mountpoint(Module):
         self.path = self.db.get_val('path')
         self.mds_uuid = self.db.get_first_ref('mds')
         self.obd_uuid = self.db.get_first_ref('obd')
-        self.add_lustre_module('mdc', 'mdc')
-        self.add_lustre_module('llite', 'llite')
         obd = self.db.lookup(self.obd_uuid)
-        self.osc = VOSC(obd)
+        self.vosc = VOSC(obd, self.name)
+        if self.vosc.need_mdc():
+            self.add_lustre_module('mdc', 'mdc')
+        self.add_lustre_module('llite', 'llite')
 
 
     def prepare(self):
-        self.osc.prepare()
-        mdc_uuid = prepare_mdc(self.db, self.mds_uuid)
+        self.vosc.prepare()
+        if self.vosc.need_mdc():
+            mdc_uuid = prepare_mdc(self.db, self.name,  self.mds_uuid)
+        else:
+            mdc_uuid = self.vosc.get_mdc_uuid()
+        if not mdc_uuid:
+            panic("Unable to determine MDC UUID. Probably need to cleanup before re-mounting.")
         self.info(self.path, self.mds_uuid, self.obd_uuid)
         cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
-              (self.osc.get_uuid(), mdc_uuid, self.path)
+              (self.vosc.get_uuid(), mdc_uuid, self.path)
         run("mkdir", self.path)
         ret, val = run(cmd)
         if ret:
@@ -1338,27 +1510,21 @@ class Mountpoint(Module):
         if fs_is_mounted(self.path):
             panic("fs is still mounted:", self.path)
 
-        self.osc.cleanup()
-        cleanup_mdc(self.db, self.mds_uuid)
+        self.vosc.cleanup()
+        if self.vosc.need_mdc():
+            cleanup_mdc(self.db, self.name, self.mds_uuid)
 
     def load_module(self):
-        self.osc.load_module()
+        self.vosc.load_module()
         Module.load_module(self)
     def cleanup_module(self):
         Module.cleanup_module(self)
-        self.osc.cleanup_module()
+        self.vosc.cleanup_module()
 
 
 # ============================================================
 # XML processing and query
 
-# OSC is no longer in the xml, so we have to fake it.
-# this is getting ugly and begging for another refactoring
-def get_osc(obd_dom):
-    obd = OBD(obd_dom)
-    osc = OSC(obd_dom, obd.name, obd.uuid, obd.active_target)
-    return osc
-
 class LustreDB:
     def lookup(self, uuid):
         """ lookup returns a new LustreDB instance"""
@@ -1411,18 +1577,26 @@ class LustreDB:
         uuids = self._get_all_refs()
         return uuids
 
-    def get_ost_net(self, uuid):
-        ost = self.lookup(uuid)
-        uuid = ost.get_first_ref('network')
-        if not uuid:
-            return None
-        return ost.lookup(uuid)
-
-    def nid2server(self, nid):
-        netlist = self.parent.parent.attrs['network']
+    def get_ost_net(self, osd_uuid):
+        srv_list = []
+        if not osd_uuid:
+            return srv_list
+        osd = self.lookup(osd_uuid)
+        node_uuid = osd.get_first_ref('node')
+        node = self.lookup(node_uuid)
+        if not node:
+            panic("unable to find node for osd_uuid:", osd_uuid,
+                  " node_ref:", node_uuid)
+        for net_uuid in node.get_networks():
+            db = node.lookup(net_uuid)
+            srv_list.append(Network(db))
+        return srv_list
+
+    def nid2server(self, nid, net_type):
+        netlist = self.lookup_class('network')
         for net_db in netlist:
-            if net_db.get_val('nid') == nid: 
-                return net
+            if net_db.get_val('nid') == nid and net_db.get_val('nettype') == net_type
+                return net_db
         return None
     
     # the tag name is the service type
@@ -1435,10 +1609,14 @@ class LustreDB:
         type = self.get_class()
         ret=0;
         if type in ('network',):
-            ret = 10
+            ret = 5
+        elif type in ('routetbl',):
+            ret = 6
+        elif type in ('ptlrpc',):
+            ret = 7
         elif type in ('device', 'ldlm'):
             ret = 20
-        elif type in ('obd', 'mdd', 'cobd'):
+        elif type in ('osd', 'mdd', 'cobd'):
             ret = 30
         elif type in ('mdsdev','ost'):
             ret = 40
@@ -1470,24 +1648,45 @@ class LustreDB:
         list.sort()
         return list
 
-    # Find the mdsdev attached to node_name that points to
-    # mds_uuid
-    # node->profiles->mdsdev_refs->mds
-    def get_mdd(self, node_name, mds_uuid):
+    # Find the target_device for target on a node
+    # node->profiles->device_refs->target
+    def get_target_device(self, target_uuid, node_name):
         node_db = self.lookup_name(node_name)
         if not node_db:
             return None
         prof_list = node_db.get_refs('profile')
         for prof_uuid in prof_list:
             prof_db = node_db.lookup(prof_uuid)
-            mdd_list = prof_db.get_refs('mdsdev')
-            for mdd_uuid in mdd_list:
-                mdd = self.lookup(mdd_uuid)
-                if mdd.get_first_ref('mds') == mds_uuid:
-                    return mdd_uuid
+            ref_list = prof_db.get_all_refs()
+            for ref in ref_list:
+                dev = self.lookup(ref[1])
+                if dev and dev.get_first_ref('target') == target_uuid:
+                    return ref[1]
         return None
+
+    def get_active_target(self):
+        target_uuid = self.getUUID()
+        target_name = self.getName()
+        node_name = config.select(target_name)
+        if node_name:
+            tgt_dev_uuid = self.get_target_device(target_uuid, node_name)
+        else:
+            tgt_dev_uuid = self.get_first_ref('active')
+        return tgt_dev_uuid
         
 
+    # get all network uuids for this node
+    def get_networks(self):
+        ret = []
+        prof_list = self.get_refs('profile')
+        for prof_uuid in prof_list:
+            prof_db = self.lookup(prof_uuid)
+            net_list = prof_db.get_refs('network')
+            #debug("get_networks():", prof_uuid, net_list)
+            for net_uuid in net_list:
+                ret.append(net_uuid)
+        return ret
+
 class LustreDB_XML(LustreDB):
     def __init__(self, dom, root_node):
         # init xmlfile
@@ -1599,25 +1798,25 @@ class LustreDB_XML(LustreDB):
         """ Return the routes as a list of tuples of the form:
         [(type, gw, lo, hi),]"""
         res = []
-        tbl = self.dom_node.getElementsByTagName('route_tbl')
+        tbl = self.dom_node.getElementsByTagName('routetbl')
         for t in tbl:
             routes = t.getElementsByTagName('route')
             for r in routes:
-                lo = self.xmlattr(r, 'lo')
-                hi = self.xmlattr(r, 'hi', '')
-                res.append((type, gw, lo, hi))
+                net_type = self.xmlattr(r, 'type')
+                if type != net_type:
+                    lo = self.xmlattr(r, 'lo')
+                    hi = self.xmlattr(r, 'hi')
+                    res.append((type, gw, lo, hi))
         return res
 
     def get_route_tbl(self):
         ret = []
-        tbls = self.dom_node.getElementsByTagName('route_tbl')
-        for tbl in tbls:
-            for r in tbl.getElementsByTagName('route'):
-                net_type = self.xmlattr(r, 'type')
-                gw = self.xmlattr(r, 'gw')
-                lo = self.xmlattr(r, 'lo')
-                hi = self.xmlattr(r,'hi', '')
-                ret.append((net_type, gw, lo, hi))
+        for r in self.dom_node.getElementsByTagName('route'):
+            net_type = self.xmlattr(r, 'type')
+            gw = self.xmlattr(r, 'gw')
+            lo = self.xmlattr(r, 'lo')
+            hi = self.xmlattr(r, 'hi')
+            ret.append((net_type, gw, lo, hi))
         return ret
 
 
@@ -1652,7 +1851,7 @@ class LustreDB_LDAP(LustreDB):
             self.l.protocol_version=ldap.VERSION3
             # user and pw only needed if modifying db
             self.l.bind_s("", "", ldap.AUTH_SIMPLE);
-        except ldap.LDAPerror, e:
+        except ldap.LDAPError, e:
             panic(e)
             # FIXME, do something useful here
 
@@ -1749,28 +1948,27 @@ class LustreDB_LDAP(LustreDB):
 # MDC UUID hack - 
 # FIXME: clean this mess up!
 #
-saved_mdc = {}
-def prepare_mdc(db, mds_uuid):
-    global saved_mdc
+# OSC is no longer in the xml, so we have to fake it.
+# this is getting ugly and begging for another refactoring
+def get_osc(ost_db, owner):
+    osc = OSC(ost_db, owner)
+    return osc
+
+def get_mdc(db, owner, mds_uuid):
     mds_db = db.lookup(mds_uuid);
     if not mds_db:
         panic("no mds:", mds_uuid)
-    if saved_mdc.has_key(mds_uuid):
-        return saved_mdc[mds_uuid]
-    mdc = MDC(mds_db)
+    mdc = MDC(mds_db, owner)
+    return mdc
+
+def prepare_mdc(db, owner, mds_uuid):
+    mdc = get_mdc(db, owner, mds_uuid)
     mdc.prepare()
-    saved_mdc[mds_uuid] = mdc.uuid
     return mdc.uuid
 
-def cleanup_mdc(db, mds_uuid):
-    global saved_mdc
-    mds_db = db.lookup(mds_uuid);
-    if not mds_db:
-        panic("no mds:", mds_uuid)
-    if not saved_mdc.has_key(mds_uuid):
-        mdc = MDC(mds_db)
-        mdc.cleanup()
-        saved_mdc[mds_uuid] = mdc.uuid
+def cleanup_mdc(db, owner, mds_uuid):
+    mdc = get_mdc(db, owner, mds_uuid)
+    mdc.cleanup()
         
 
 ############################################################
@@ -1780,13 +1978,20 @@ routes = []
 local_node = []
 router_flag = 0
 
-def init_node(node_db):
-    global local_node, router_flag
-    netlist = node_db.lookup_class('network')
-    for db in netlist:
-        type = db.get_val('nettype')
-        gw = db.get_val('nid')
-        local_node.append((type, gw))
+def add_local_interfaces(node_db):
+    global local_node
+    for netuuid in node_db.get_networks():
+        net = node_db.lookup(netuuid)
+        srv = Network(net)
+        debug("add_local", netuuid)
+        local_node.append((srv.net_type, srv.nid))
+        if acceptors.has_key(srv.port):
+            panic("duplicate port:", srv.port)
+        if srv.net_type in ('tcp', 'toe'):
+            acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type,
+                                                  srv.send_mem, srv.recv_mem,
+                                                  srv.irq_affinity,
+                                                  srv.nid_exchange)
 
 def node_needs_router():
     return router_flag
@@ -1800,89 +2005,84 @@ def init_route_config(lustre):
     for node_db in list:
         if node_db.get_val_int('router', 0):
             router_flag = 1
+            #debug("init_route_config: found router", node_db.getName())
             for (local_type, local_nid) in local_node:
+                #debug("init_route_config:", local_type, local_nid)
                 gw = None
-                netlist = node_db.lookup_class('network')
-                for db in netlist:
-                    if local_type == db.get_val('type'):
-                        gw = db.get_val('server')
+                for netuuid in node_db.get_networks():
+                    db = node_db.lookup(netuuid)
+                    if local_type == db.get_val('nettype'):
+                        gw = db.get_val('nid')
                         break
+                #debug("init_route_config: gw is", gw)
                 if not gw:
                     continue
-                for db in netlist:
-                    if local_type != db.get_val('type'):
-                        for route in db.get_routes(local_type, gw):
-                            routes.append(route)
-    
+                for route in node_db.get_routes(local_type, gw):
+                    routes.append(route)
+    debug("init_route_config routes:", routes)
+
 
-def local_net(net):
+def local_net(srv_list):
     global local_node
     for iface in local_node:
-        #debug("local_net a:", net.net_type, "b:", iface[0])
-        if net.net_type == iface[0]:
+        for srv in srv_list:
+            #debug("local_net a:", srv.net_type, "b:", iface[0])
+            if srv.net_type == iface[0]:
+                return srv
+    return None
+
+def local_net_type(net_type):
+    global local_node
+    for iface in local_node:
+        if net_type == iface[0]:
             return 1
     return 0
 
-def find_route(net):
+def find_route(srv_list):
     global local_node, routes
     frm_type = local_node[0][0]
-    to_type = net.net_type
-    to = net.nid
-    debug ('looking for route to', to_type,to)
-    for r in routes:
-        if  r[2] == to:
-            return r
-    return None
+    for srv in srv_list:
+        #debug("find_route: srv:", srv.hostaddr, "type: ", srv.net_type)
+        to_type = srv.net_type
+        to = srv.hostaddr
+        #debug ('looking for route to', to_type, to)
+        for r in routes:
+            #debug("find_route: ", r)
+            if  r[2] == to:
+                return srv, r
+    return None,None
            
-    
 
 ############################################################
 # lconf level logic
 # Start a service.
-def startService(db, module_flag):
+def newService(db):
     type = db.get_class()
     debug('Service:', type, db.getName(), db.getUUID())
-    # there must be a more dynamic way of doing this...
     n = None
     if type == 'ldlm':
         n = LDLM(db)
+    elif type == 'ptlrpc':
+        n = PTLRPC(db)
     elif type == 'lov':
         n = LOV(db)
     elif type == 'network':
         n = Network(db)
-    elif type == 'obd':
-        n = OBD(db)
+    elif type == 'routetbl':
+        n = Router(db)
+    elif type == 'osd':
+        n = OSD(db)
     elif type == 'cobd':
         n = COBD(db)
-    elif type == 'ost':
-        n = OST(db)
     elif type == 'mdsdev':
         n = MDSDEV(db)
-    elif type == 'osc':
-        n = VOSC(db)
-    elif type == 'mdc':
-        n = MDC(db)
     elif type == 'mountpoint':
         n = Mountpoint(db)
     elif type == 'echoclient':
         n = ECHO_CLIENT(db)
     else:
         panic ("unknown service type:", type)
-
-    if module_flag:
-        if config.nomod():
-            return
-        if config.cleanup():
-            n.cleanup_module()
-        else:
-            n.load_module()
-    else:
-        if config.nosetup():
-            return
-        if config.cleanup():
-            n.cleanup()
-        else:
-            n.prepare()
+    return n
 
 #
 # Prepare the system to run lustre using a particular profile
@@ -1892,15 +2092,43 @@ def startService(db, module_flag):
 #  * make sure partitions are in place and prepared
 #  * initialize devices with lctl
 # Levels is important, and needs to be enforced.
-def startProfile(prof_db, module_flag):
-    if not prof_db:
-        panic("profile:", profile, "not found.")
-    services = prof_db.getServices()
-    if config.cleanup():
-        services.reverse()
+def for_each_profile(db, prof_list, operation):
+    for prof_uuid in prof_list:
+        prof_db = db.lookup(prof_uuid)
+        if not prof_db:
+            panic("profile:", profile, "not found.")
+        services = prof_db.getServices()
+        operation(services)
+        
+def doSetup(services):
+    if config.nosetup():
+        return
+    for s in services:
+        n = newService(s[1])
+        n.prepare()
+    
+def doModules(services):
+    if config.nomod():
+        return
     for s in services:
-        startService(s[1], module_flag)
+        n = newService(s[1])
+        n.load_module()
 
+def doCleanup(services):
+    if config.nosetup():
+        return
+    services.reverse()
+    for s in services:
+        n = newService(s[1])
+        n.cleanup()
+
+def doUnloadModules(services):
+    if config.nomod():
+        return
+    services.reverse()
+    for s in services:
+        n = newService(s[1])
+        n.cleanup_module()
 
 #
 # Load profile for 
@@ -1920,33 +2148,49 @@ def doHost(lustreDB, hosts):
     recovery_upcall = node_db.get_val('recovery_upcall', '')
     timeout = node_db.get_val_int('timeout', 0)
 
+    add_local_interfaces(node_db)
     if not router_flag:
-        init_node(node_db)
         init_route_config(lustreDB)
 
     # Two step process: (1) load modules, (2) setup lustre
     # if not cleaning, load modules first.
-    module_flag = not config.cleanup()
     prof_list = node_db.get_refs('profile')
-    for prof_uuid in prof_list:
-        prof_db = node_db.lookup(prof_uuid)
-        startProfile(prof_db, module_flag)
 
-    if not config.cleanup():
+    if config.cleanup():
+        if config.force():
+            # the command line can override this value
+            timeout = 5
+        # ugly hack, only need to run lctl commands for --dump
+        if config.lctl_dump():
+            for_each_profile(node_db, prof_list, doCleanup)
+            return
+
+        sys_set_timeout(timeout)
+        sys_set_recovery_upcall(recovery_upcall)
+
+        for_each_profile(node_db, prof_list, doCleanup)
+        for_each_profile(node_db, prof_list, doUnloadModules)
+
+    else:
+        # ugly hack, only need to run lctl commands for --dump
+        if config.lctl_dump():
+            for_each_profile(node_db, prof_list, doSetup)
+            return
+
+        for_each_profile(node_db, prof_list, doModules)
+
         sys_set_debug_path()
         script = config.gdb_script()
         run(lctl.lctl, ' modules >', script)
         if config.gdb():
-            # dump /tmp/ogdb and sleep/pause here
             log ("The GDB module script is in", script)
+            # pause, so user has time to break and
+            # load the script
             time.sleep(5)
         sys_set_timeout(timeout)
         sys_set_recovery_upcall(recovery_upcall)
-            
-    module_flag = not module_flag
-    for prof_uuid in prof_list:
-        prof_db = node_db.lookup(prof_uuid)
-        startProfile(prof_db, module_flag)
+
+        for_each_profile(node_db, prof_list, doSetup)
 
 ############################################################
 # Command line processing
@@ -1958,7 +2202,7 @@ def parse_cmdline(argv):
                  "help", "node=", "nomod", "nosetup",
                  "dump=", "force", "minlevel=", "maxlevel=",
                  "timeout=", "recovery_upcall=",
-                 "ldapurl=", "config=", "select="]
+                 "ldapurl=", "config=", "select=", "lctl_dump="]
     opts = []
     args = []
 
@@ -1977,7 +2221,6 @@ def parse_cmdline(argv):
             config.verbose(1)
         if o in ("-n", "--noexec"):
             config.noexec(1)
-            config.verbose(1)
         if o == "--portals":
             config.portals_dir(a)
         if o == "--lustre":
@@ -2010,6 +2253,8 @@ def parse_cmdline(argv):
                 config.config_name(a)
         if o == "--select":
                 config.init_select(a)
+        if o == "--lctl_dump":
+            config.lctl_dump(a)
 
     return args
 
@@ -2119,7 +2364,7 @@ def sanitise_path():
 # Shutdown does steps in reverse
 #
 def main():
-    global TCP_ACCEPTOR, lctl, MAXTCPBUF
+    global  lctl, MAXTCPBUF
 
     host = socket.gethostname()
 
@@ -2169,19 +2414,13 @@ def main():
 
     setupModulePath(sys.argv[0])
 
-    TCP_ACCEPTOR = find_prog('acceptor')
-    if not TCP_ACCEPTOR:
-        if config.noexec():
-            TCP_ACCEPTOR = 'acceptor'
-            debug('! acceptor not found')
-        else:
-            panic('acceptor not found')
-
     lctl = LCTLInterface('lctl')
-
-    sys_make_devices()
-    sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
-    sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
+    if config.lctl_dump():
+        lctl.use_save_file(config.lctl_dump())
+    else:
+        sys_make_devices()
+        sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
+        sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
 
     doHost(db, node_list)
 
@@ -2196,4 +2435,4 @@ if __name__ == "__main__":
 
     if first_cleanup_error:
         sys.exit(first_cleanup_error)
-
+