Whamcloud - gitweb
land 0.5.20.3 b_devel onto HEAD (b_devel will remain)
[fs/lustre-release.git] / lustre / utils / lconf.in
index 170c5d0..cbe05dd 100755 (executable)
 #
 # Based in part on the XML obdctl modifications done by Brian Behlendorf 
 
-import sys, getopt
-import string, os, stat, popen2, socket, time, random
+import sys, getopt, types
+import string, os, stat, popen2, socket, time, random, fcntl, select
 import re, exceptions
 import xml.dom.minidom
 
+if sys.version[0] == '1':
+    from FCNTL import F_GETFL, F_SETFL
+else:
+    from fcntl import F_GETFL, F_SETFL
+
 # Global parameters
-TCP_ACCEPTOR = ''
 MAXTCPBUF = 1048576
 DEFAULT_TCPBUF = 1048576
 #
@@ -50,8 +54,10 @@ def usage():
     print """usage: lconf config.xml
 
 config.xml          Lustre configuration in xml format.
---get <url>         URL to fetch a config file
+--ldapurl           LDAP server URL, eg. ldap://localhost
+--config            Cluster config name used for LDAP query
 --node <nodename>   Load config for <nodename>
+--select service=nodeA,service2=nodeB   U
 -d | --cleanup      Cleans up config. (Shutdown)
 -f | --force        Forced unmounting and/or obd detach during cleanup
 -v | --verbose      Print system commands as they are run
@@ -70,10 +76,10 @@ config.xml          Lustre configuration in xml format.
                     Levels are aproximatly like:
                             10 - network
                             20 - device, ldlm
-                            30 - obd, mdd
+                            30 - osd, mdd
                             40 - mds, ost
                             50 - mdc, osc
-                            60 - lov, lovconfig
+                            60 - lov
                             70 - mountpoint, echo_client
 --lustre=src_dir    Base directory of lustre sources. This parameter will cause lconf
                     to load modules from a source tree.
@@ -112,8 +118,12 @@ class Config:
         self._portals_dir = ''
        self._minlevel = 0
        self._maxlevel = 100
-        self._timeout = -1
+        self._timeout = 0
         self._recovery_upcall = ''
+        self._ldapurl = ''
+        self._config_name = ''
+        self._select = {}
+        self._lctl_dump = ''
 
     def verbose(self, flag = None):
         if flag: self._verbose = flag
@@ -151,10 +161,6 @@ class Config:
         if val: self._node = val
         return self._node
 
-    def url(self, val = None):
-        if val: self._url = val
-        return self._url
-
     def gdb_script(self):
         if os.path.isdir('/r'):
             return '/r' + self._gdb_script
@@ -170,7 +176,6 @@ class Config:
     def dump_file(self, val = None):
         if val: self._dump_file = val
         return self._dump_file
-
     def minlevel(self, val = None):
         if val: self._minlevel = int(val)
         return self._minlevel
@@ -195,6 +200,31 @@ class Config:
         if val: self._recovery_upcall = val
         return self._recovery_upcall
 
+    def ldapurl(self, val = None):
+        if val: self._ldapurl = val
+        return self._ldapurl
+
+    def config_name(self, val = None):
+        if val: self._config_name = val
+        return self._config_name
+
+    def init_select(self, arg):
+        # arg = "service=nodeA,service2=nodeB"
+        list = string.split(arg, ',')
+        for entry in list:
+            srv, node = string.split(entry, '=')
+            self._select[srv] = node
+        
+    def select(self, srv):
+        if self._select.has_key(srv):
+            return self._select[srv]
+        return None
+
+    def lctl_dump(self, val = None):
+        if val: self._lctl_dump = val
+        return self._lctl_dump
+
+
 config = Config()
 
 # ============================================================ 
@@ -254,6 +284,104 @@ class LconfError (exceptions.Exception):
 
 
 # ============================================================
+# handle daemons, like the acceptor
+class DaemonHandler:
+    """ Manage starting and stopping a daemon. Assumes daemon manages
+    it's own pid file. """
+
+    def __init__(self, cmd):
+        self.command = cmd
+        self.path =""
+
+    def start(self):
+        if self.running():
+            log(self.command, "already running.")
+        if not self.path:
+            self.path = find_prog(self.command)
+            if not self.path:
+                panic(self.command, "not found.")
+        ret, out = runcmd(self.path +' '+ self.command_line())
+        if ret:
+            raise CommandError(self.path, out, ret)
+
+    def stop(self):
+        if self.running():
+            pid = self.read_pidfile()
+            try:
+                log ("killing process", pid)
+                os.kill(pid, 15)
+                #time.sleep(1) # let daemon die
+            except OSError, e:
+                log("unable to kill", self.command, e)
+            if self.running():
+                log("unable to kill", self.command)
+
+    def running(self):
+        pid = self.read_pidfile()
+        if pid:
+            try:
+                os.kill(pid, 0)
+            except OSError:
+                self.clean_pidfile()
+            else:
+                return 1
+        return 0
+
+    def read_pidfile(self):
+        try:
+            fp = open(self.pidfile(), 'r')
+            pid = int(fp.read())
+            fp.close()
+            return pid
+        except IOError:
+            return 0
+        
+    def clean_pidfile(self):
+        """ Remove a stale pidfile """
+        log("removing stale pidfile:", self.pidfile())
+        try:
+            os.unlink(self.pidfile())
+        except OSError, e:
+            log(self.pidfile(), e)
+            
+class AcceptorHandler(DaemonHandler):
+    def __init__(self, port, net_type, send_mem, recv_mem, irq_aff, nid_xchg):
+        DaemonHandler.__init__(self, "acceptor")
+        self.port = port
+        self.flags = ''
+        self.send_mem = send_mem
+        self.recv_mem = recv_mem
+
+        if net_type == 'toe':
+            self.flags = self.flags + ' -N 4'
+        if irq_aff:
+            self.flags = self.flags + ' -i'
+        if nid_xchg:
+            self.flags = self.flags + ' -x'
+
+    def pidfile(self):
+        return "/var/run/%s-%d.pid" % (self.command, self.port)
+
+    def command_line(self):
+        return string.join(map(str,('-s', self.send_mem, '-r', self.recv_mem, self.flags, self.port)))
+    
+acceptors = {}
+
+# start the acceptors
+def run_acceptors():
+    for port in acceptors.keys():
+        daemon = acceptors[port]
+        if not daemon.running():
+            daemon.start()
+
+def stop_acceptor(port):
+    if acceptors.has_key(port):
+        daemon = acceptors[port]
+        if daemon.running():
+            daemon.stop()
+        
+
+# ============================================================
 # handle lctl interface
 class LCTLInterface:
     """
@@ -265,6 +393,7 @@ class LCTLInterface:
         Initialize close by finding the lctl binary.
         """
         self.lctl = find_prog(cmd)
+        self.save_file = ''
         if not self.lctl:
             if config.noexec():
                 debug('! lctl not found')
@@ -272,6 +401,13 @@ class LCTLInterface:
             else:
                 raise CommandError('lctl', "unable to find lctl binary.")
 
+    def use_save_file(self, file):
+        self.save_file = file
+        
+    def set_nonblock(self, fd):
+        fl = fcntl.fcntl(fd, F_GETFL)
+        fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
+
     def run(self, cmds):
         """
         run lctl
@@ -281,21 +417,48 @@ class LCTLInterface:
         should modify command line to accept multiple commands, or
         create complex command line options
         """
-        debug("+", self.lctl, cmds)
+        cmd_line = self.lctl
+        if self.save_file:
+            cmds = '\n  dump ' + self.save_file + cmds
+
+        debug("+", cmd_line, cmds)
         if config.noexec(): return (0, [])
-        p = popen2.Popen3(self.lctl, 1)
-        p.tochild.write(cmds + "\n")
-        p.tochild.close()
-        out = p.fromchild.readlines()
-        err = p.childerr.readlines()
-        ret = p.wait()
+
+        child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
+        child.tochild.write(cmds + "\n")
+        child.tochild.close()
+
+        # From "Python Cookbook" from O'Reilly
+        outfile = child.fromchild
+        outfd = outfile.fileno()
+        self.set_nonblock(outfd)
+        errfile = child.childerr
+        errfd = errfile.fileno()
+        self.set_nonblock(errfd)
+
+        outdata = errdata = ''
+        outeof = erreof = 0
+        while 1:
+            ready = select.select([outfd,errfd],[],[]) # Wait for input
+            if outfd in ready[0]:
+                outchunk = outfile.read()
+                if outchunk == '': outeof = 1
+                outdata = outdata + outchunk
+            if errfd in ready[0]:
+                errchunk = errfile.read()
+                if errchunk == '': erreof = 1
+                errdata = errdata + errchunk
+            if outeof and erreof: break
+        # end of "borrowed" code
+
+        ret = child.wait()
         if os.WIFEXITED(ret):
             rc = os.WEXITSTATUS(ret)
         else:
             rc = 0
-        if rc or len(err):
-            raise CommandError(self.lctl, err, rc)
-        return rc, out
+        if rc or len(errdata):
+            raise CommandError(self.lctl, errdata, rc)
+        return rc, outdata
 
     def runcmd(self, *args):
         """
@@ -317,33 +480,28 @@ class LCTLInterface:
             cmds =  """
   network %s
   mynid %s
-  add_uuid self %s
-  quit""" % (net, nid, nid)
-        else:
-            cmds =  """
-  network %s
-  add_uuid self %s
-  quit""" % (net, nid)
-            
-        self.run(cmds)
+  quit """ % (net, nid)
+            self.run(cmds)
 
     # create a new connection
-    def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
-        if net  in ('tcp', 'toe'):
-            cmds =  """
+    def connect(self, srv):
+        cmds =  "\n  add_uuid %s %s %s" % (srv.uuid, srv.nid, srv.net_type)
+        if srv.net_type  in ('tcp', 'toe') and not config.lctl_dump():
+            flags = ''
+            if srv.irq_affinity:
+                flags = flags + 'i'
+            if srv.nid_exchange:
+                flags = flags + 'x'
+            cmds =  """%s          
   network %s
-  add_uuid %s %s
   send_mem %d
   recv_mem %d
-  connect %s %d
-  quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port,  )
-        else:
-            cmds =  """
-  network %s
-  add_uuid %s %s
-  connect %s %d
-  quit""" % (net, servuuid, nid, nid, port,  )
-            
+  connect %s %d %s""" % (cmds, srv.net_type,
+             srv.send_mem,
+             srv.recv_mem,
+             srv.hostaddr, srv.port, flags )
+
+        cmds = cmds + "\n  quit"
         self.run(cmds)
                 
     # add a route to a range
@@ -351,7 +509,8 @@ class LCTLInterface:
         cmds =  """
   network %s
   add_route %s %s %s
-  quit  """ % (net, gw, lo, hi)
+  quit  """ % (net,
+               gw, lo, hi)
         self.run(cmds)
 
                 
@@ -367,9 +526,11 @@ class LCTLInterface:
     def add_route_host(self, net, uuid, gw, tgt):
         cmds =  """
   network %s
-  add_uuid %s %s
+  add_uuid %s %s %s
   add_route %s %s
-  quit """ % (net, uuid, tgt, gw, tgt)
+  quit """ % (net,
+              uuid, tgt, net,
+              gw, tgt)
         self.run(cmds)
 
     # add a route to a range
@@ -397,7 +558,6 @@ class LCTLInterface:
         cmds =  """
   ignore_errors
   network %s
-  del_uuid self
   disconnect
   quit""" % (net)
         self.run(cmds)
@@ -416,8 +576,8 @@ class LCTLInterface:
         cmds = """
   ignore_errors
   device $%s
-  cleanup
-  detach %s
+  cleanup %s
+  detach
   quit""" % (name, ('', 'force')[config.force()])
         self.run(cmds)
 
@@ -454,8 +614,7 @@ class LCTLInterface:
 # Run a command and return the output and status.
 # stderr is sent to /dev/null, could use popen3 to
 # save it if necessary
-def run(*args):
-    cmd = string.join(map(str,args))
+def runcmd(cmd):
     debug ("+", cmd)
     if config.noexec(): return (0, [])
     f = os.popen(cmd + ' 2>&1')
@@ -467,6 +626,10 @@ def run(*args):
         ret = 0
     return (ret, out)
 
+def run(*args):
+    cmd = string.join(map(str,args))
+    return runcmd(cmd)
+
 # Run a command in the background.
 def run_daemon(*args):
     cmd = string.join(map(str,args))
@@ -487,7 +650,7 @@ def find_prog(cmd):
     cmdpath = os.path.dirname(sys.argv[0])
     syspath.insert(0, cmdpath);
     if config.portals_dir():
-        syspath.insert(0, os.path.join(cmdpath, config.portals_dir()+'/linux/utils/'))
+        syspath.insert(0, os.path.join(config.portals_dir()+'/linux/utils/'))
     for d in syspath:
         prog = os.path.join(d,cmd)
         if os.access(prog, os.X_OK):
@@ -527,23 +690,20 @@ def is_block(path):
 
 # build fs according to type
 # fixme: dangerous
-def mkfs(fstype, dev):
+def mkfs(dev, devsize, fstype):
+    block_cnt = ''
+    if devsize:
+        # devsize is in 1k, and fs block count is in 4k
+        block_cnt = devsize/4
+
     if(fstype in ('ext3', 'extN')):
-        mkfs = 'mkfs.ext2 -j -b 4096'
+        mkfs = 'mkfs.ext2 -j -b 4096 -F '
     elif (fstype == 'reiserfs'):
-        mkfs = 'mkfs.reiserfs -f'
+        mkfs = 'mkreiserfs -ff'
     else:
         print 'unsupported fs type: ', fstype
-    if not is_block(dev):
-        if(fstype in ('ext3', 'extN')):
-            force = '-F'
-        elif (fstype == 'reiserfs'):
-            force = ''
-        else:
-            print 'unsupported fs type: ', fstype
-    else:
-        force = ''
-    (ret, out) = run (mkfs, force, dev)
+
+    (ret, out) = run (mkfs, dev, block_cnt)
     if ret:
         panic("Unable to build fs:", dev)
     # enable hash tree indexing on fsswe
@@ -587,8 +747,12 @@ def init_loop(file, size, fstype):
         return dev
     if config.reformat()  or not os.access(file, os.R_OK | os.W_OK):
         if size < 8000:
-            error(file, "size must be larger than 8MB")
-        run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size,  file))
+            panic(file, "size must be larger than 8MB, currently set to:", size)
+        (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size,
+                                                                         file))
+        if ret:
+            panic("Unable to create backing store:", file)
+
     loop = loop_base()
     # find next free loop
     for n in xrange(0, MAX_LOOP_DEVICES):
@@ -624,7 +788,7 @@ def block_dev(dev, size, fstype, format):
     if not is_block(dev):
         dev = init_loop(dev, size, fstype)
     if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
-        mkfs(fstype, dev)
+        mkfs(dev, size, fstype)
 
 #    else:
 #        panic("device:", dev,
@@ -642,6 +806,16 @@ def if2addr(iface):
     ip = string.split(addr, ':')[1]
     return ip
 
+def get_local_nid(net_type, wildcard):
+    """Return the local nid. First look for an elan interface,
+      then use the local address. """
+    local = ""
+    if os.access('/proc/elan/device0/position', os.R_OK):
+        local = get_local_address('elan', '*')
+    else:
+        local = get_local_address(net_type, wildcard)
+    return local
+        
 def get_local_address(net_type, wildcard):
     """Return the local address for the network type."""
     local = ""
@@ -676,6 +850,8 @@ def is_prepared(uuid):
     """Return true if a device exists for the uuid"""
     # expect this format:
     # 1 UP ldlm ldlm ldlm_UUID 2
+    if config.lctl_dump():
+        return 0
     try:
         out = lctl.device_list()
         for s in out:
@@ -684,6 +860,21 @@ def is_prepared(uuid):
     except CommandError, e:
         e.dump()
     return 0
+
+def is_network_prepared():
+    """If the  PTLRPC device exists, then assumet that all networking
+       has been configured"""
+    if config.lctl_dump():
+        return 0
+    try:
+        out = lctl.device_list()
+        for s in out:
+            if 'RPCDEV_UUID' == string.split(s)[4]:
+                return 1
+    except CommandError, e:
+        e.dump()
+    return 0
+    
     
 def fs_is_mounted(path):
     """Return true if path is a mounted lustre filesystem"""
@@ -707,11 +898,11 @@ class Module:
     """ Base class for the rest of the modules. The default cleanup method is
     defined here, as well as some utilitiy funcs.
     """
-    def __init__(self, module_name, dom_node):
-        self.dom_node = dom_node
+    def __init__(self, module_name, db):
+        self.db = db
         self.module_name = module_name
-        self.name = get_attr(dom_node, 'name')
-        self.uuid = get_attr(dom_node, 'uuid')
+        self.name = self.db.getName()
+        self.uuid = self.db.getUUID()
         self.kmodule_list = []
         self._server = None
         self._connected = 0
@@ -720,35 +911,16 @@ class Module:
         msg = string.join(map(str,args))
         print self.module_name + ":", self.name, self.uuid, msg
 
-
-    def lookup_server(self, srv_uuid):
-        """ Lookup a server's network information """
-        net = get_ost_net(self.dom_node.parentNode, srv_uuid)
-        if not net:
-            panic ("Unable to find a server for:", srv_uuid)
-        self._server = Network(net)
-
-    def get_server(self):
-        return self._server
-
     def cleanup(self):
         """ default cleanup, used for most modules """
         self.info()
-        srv = self.get_server()
-        if srv and local_net(srv):
-            try:
-                lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
-            except CommandError, e:
-                log(self.module_name, "disconnect failed: ", self.name)
-                e.dump()
-                cleanup_error(e.rc)
         try:
             lctl.cleanup(self.name, self.uuid)
         except CommandError, e:
             log(self.module_name, "cleanup failed: ", self.name)
             e.dump()
             cleanup_error(e.rc)
-
+            
     def add_portals_module(self, dev_dir, modname):
         """Append a module to list of modules to load."""
         self.kmodule_list.append((config.portals_dir(), dev_dir, modname))
@@ -804,195 +976,189 @@ class Module:
                 log('! unable to unload module:', mod)
                 logall(out)
         
-
 class Network(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'NETWORK', dom_node)
-        self.net_type = get_attr(dom_node,'type')
-        self.nid = get_text(dom_node, 'server', '*')
-        self.port = get_text_int(dom_node, 'port', 0)
-        self.send_mem = get_text_int(dom_node, 'send_mem', DEFAULT_TCPBUF)
-        self.recv_mem = get_text_int(dom_node, 'recv_mem', DEFAULT_TCPBUF)
+    def __init__(self,db):
+        Module.__init__(self, 'NETWORK', db)
+        self.net_type = self.db.get_val('nettype')
+        self.nid = self.db.get_val('nid', '*')
+        self.port = self.db.get_val_int('port', 0)
+        self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF)
+        self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF)
+        self.irq_affinity = self.db.get_val_int('irqaffinity', 0)
+        self.nid_exchange = self.db.get_val_int('nidexchange', 0)
+
         if '*' in self.nid:
-            self.nid = get_local_address(self.net_type, self.nid)
+            self.nid = get_local_nid(self.net_type, self.nid)
             if not self.nid:
                 panic("unable to set nid for", self.net_type, self.nid)
             debug("nid:", self.nid)
 
+        self.hostaddr = self.db.get_val('hostaddr', self.nid)
+        if '*' in self.hostaddr:
+            self.hostaddr = get_local_address(self.net_type, self.hostaddr)
+            if not self.nid:
+                panic("unable to set nid for", self.net_type, self.hostaddr)
+            debug("hostaddr:", self.hostaddr)
+        # debug ( "hostaddr ", self.hostaddr, "net_type", self.net_type)
+
         self.add_portals_module("linux/oslib", 'portals')
         if node_needs_router():
             self.add_portals_module("linux/router", 'kptlrouter')
         if self.net_type == 'tcp':
             self.add_portals_module("linux/socknal", 'ksocknal')
         if self.net_type == 'toe':
-            self.add_portals_odule("/linux/toenal", 'ktoenal')
+            self.add_portals_module("/linux/toenal", 'ktoenal')
         if self.net_type == 'elan':
             self.add_portals_module("/linux/rqswnal", 'kqswnal')
         if self.net_type == 'gm':
             self.add_portals_module("/linux/gmnal", 'kgmnal')
         self.add_lustre_module('obdclass', 'obdclass')
-        self.add_lustre_module('ptlrpc', 'ptlrpc')
 
     def prepare(self):
+        if is_network_prepared():
+            return
         self.info(self.net_type, self.nid, self.port)
-        if self.net_type in ('tcp', 'toe'):
-            nal_id = '' # default is socknal
-            if self.net_type == 'toe':
-                nal_id = '-N 4'
-            ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, nal_id, self.port)
-            if ret:
-                raise CommandError(TCP_ACCEPTOR, out, ret)
-        ret = self.dom_node.getElementsByTagName('route_tbl')
-        for a in ret:
-            for r in a.getElementsByTagName('route'):
-                net_type = get_attr(r, 'type')
-                gw = get_attr(r, 'gw')
-                lo = get_attr(r, 'lo')
-                hi = get_attr(r,'hi', '')
-                lctl.add_route(net_type, gw, lo, hi)
-                if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
-                    srv = nid2server(self.dom_node.parentNode.parentNode, lo)
-                    if not srv:
-                        panic("no server for nid", lo)
-                    else:
-                        lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-
-            
         lctl.network(self.net_type, self.nid)
-        lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
 
     def cleanup(self):
         self.info(self.net_type, self.nid, self.port)
-        ret = self.dom_node.getElementsByTagName('route_tbl')
-        for a in ret:
-            for r in a.getElementsByTagName('route'):
-                lo = get_attr(r, 'lo')
-                hi = get_attr(r,'hi', '')
-                if self.net_type in ('tcp', 'toe') and hi == '':
-                    srv = nid2server(self.dom_node.parentNode.parentNode, lo)
-                    if not srv:
-                        panic("no server for nid", lo)
-                    else:
-                        try:
-                            lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
-                        except CommandError, e:
-                            print "disconnect failed: ", self.name
-                            e.dump()
-                            cleanup_error(e.rc)
-                try:
-                    lctl.del_route(self.net_type, self.nid, lo, hi)
-                except CommandError, e:
-                    print "del_route failed: ", self.name
-                    e.dump()
-                    cleanup_error(e.rc)
-              
-        try:
-            lctl.cleanup("RPCDEV", "RPCDEV_UUID")
-        except CommandError, e:
-            print "cleanup failed: ", self.name
-            e.dump()
-            cleanup_error(e.rc)
+        if self.net_type in ('tcp', 'toe'):
+            stop_acceptor(self.port)
         try:
             lctl.disconnectAll(self.net_type)
         except CommandError, e:
             print "disconnectAll failed: ", self.name
             e.dump()
             cleanup_error(e.rc)
-        if self.net_type in ('tcp', 'toe'):
-            # yikes, this ugly! need to save pid in /var/something
-            run("killall acceptor")
+
+class Router(Module):
+    def __init__(self,db):
+        Module.__init__(self, 'ROUTER', db)
+    def prepare(self):
+        if is_network_prepared():
+            return
+        self.info()
+        for net_type, gw, lo, hi in self.db.get_route_tbl():
+            lctl.add_route(net_type, gw, lo, hi)
+            if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+                srvdb = self.db.nid2server(lo, net_type)
+
+                if not srvdb:
+                    panic("no server for nid", lo)
+                else:
+                    srv = Network(srvdb)
+                    lctl.connect(srv)
+    def cleanup(self):
+        for net_type, gw, lo, hi in self.db.get_route_tbl():
+            if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+                srvdb = self.db.nid2server(lo, net_type)
+                if not srvdb:
+                    panic("no server for nid", lo)
+                else:
+                    srv = Network(srvdb)
+                    try:
+                        lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+                    except CommandError, e:
+                        print "disconnect failed: ", self.name
+                        e.dump()
+                        cleanup_error(e.rc)
+            try:
+                lctl.del_route(net_type, gw, lo, hi)
+            except CommandError, e:
+                print "del_route failed: ", self.name
+                e.dump()
+                cleanup_error(e.rc)
 
 class LDLM(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'LDLM', dom_node)
+    def __init__(self,db):
+        Module.__init__(self, 'LDLM', db)
         self.add_lustre_module('ldlm', 'ldlm') 
     def prepare(self):
         if is_prepared(self.uuid):
             return
         self.info()
-        lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
-                    setup ="")
-
-class LOV(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'LOV', dom_node)
-        self.mds_uuid = get_first_ref(dom_node, 'mds')
-        mds= lookup(dom_node.parentNode, self.mds_uuid)
-        self.mds_name = getName(mds)
-        devs = dom_node.getElementsByTagName('devices')
-        if len(devs) > 0:
-            dev_node = devs[0]
-            self.stripe_sz = get_attr_int(dev_node, 'stripesize', 65536)
-            self.stripe_off = get_attr_int(dev_node, 'stripeoffset', 0)
-            self.pattern = get_attr_int(dev_node, 'pattern', 0)
-            self.devlist = get_all_refs(dev_node, 'obd')
-            self.stripe_cnt = get_attr_int(dev_node, 'stripecount', len(self.devlist))
-        self.add_lustre_module('mdc', 'mdc')
-        self.add_lustre_module('lov', 'lov')
+        lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid))
+    def cleanup(self):
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
 
+class PTLRPC(Module):
+    def __init__(self,db):
+        Module.__init__(self, 'PTLRPC', db)
+        self.add_lustre_module('ptlrpc', 'ptlrpc') 
     def prepare(self):
         if is_prepared(self.uuid):
             return
+        self.info()
+        lctl.newdev(attach="ptlrpc %s %s" % (self.name, self.uuid))
+    def cleanup(self):
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
+
+class LOV(Module):
+    def __init__(self,db):
+        Module.__init__(self, 'LOV', db)
+        self.add_lustre_module('mdc', 'mdc')
+        self.add_lustre_module('lov', 'lov')
+        self.mds_uuid = self.db.get_first_ref('mds')
+        mds= self.db.lookup(self.mds_uuid)
+        self.mds_name = mds.getName()
+        self.stripe_sz = self.db.get_val_int('stripesize', 65536)
+        self.stripe_off = self.db.get_val_int('stripeoffset', 0)
+        self.pattern = self.db.get_val_int('stripepattern', 0)
+        self.devlist = self.db.get_refs('obd')
+        self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
+        self.osclist = []
+        self.mdc_uudi = ''
         for obd_uuid in self.devlist:
-            obd = lookup(self.dom_node.parentNode, obd_uuid)
-            osc = get_osc(obd)
+            obd = self.db.lookup(obd_uuid)
+            osc = get_osc(obd, self.name)
             if osc:
-                try:
-                    # Ignore connection failures, because the LOV will DTRT with
-                    # an unconnected OSC.
-                    osc.prepare(ignore_connect_failure=1)
-                except CommandError:
-                    print "Error preparing OSC %s (inactive)\n" % osc_uuid
+                self.osclist.append(osc)
             else:
-                panic('osc not found:', osc_uuid)
-        mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
+                panic('osc not found:', obd_uuid)
+            
+    def prepare(self):
+        if is_prepared(self.uuid):
+            return
+        for osc in self.osclist:
+            try:
+                # Ignore connection failures, because the LOV will DTRT with
+                # an unconnected OSC.
+                osc.prepare(ignore_connect_failure=1)
+            except CommandError:
+                print "Error preparing OSC %s (inactive)\n" % osc.uuid
+        self.mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
         self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
                   self.stripe_off, self.pattern, self.devlist, self.mds_name)
         lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
-                    setup ="%s" % (mdc_uuid))
+                    setup ="%s" % (self.mdc_uuid))
 
     def cleanup(self):
-        if not is_prepared(self.uuid):
-            return
-        for obd_uuid in self.devlist:
-            obd = lookup(self.dom_node.parentNode, obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.cleanup()
-            else:
-                panic('osc not found:', osc_uuid)
-        Module.cleanup(self)
-        cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
-
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
+        for osc in self.osclist:
+            osc.cleanup()
+        cleanup_mdc(self.db, self.name, self.mds_uuid)
 
     def load_module(self):
-        for obd_uuid in self.devlist:
-            obd = lookup(self.dom_node.parentNode, obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.load_module()
-                break
-            else:
-                panic('osc not found:', osc_uuid)
+        for osc in self.osclist:
+            osc.load_module()
+            break
         Module.load_module(self)
 
-
     def cleanup_module(self):
         Module.cleanup_module(self)
-        for obd_uuid in self.devlist:
-            obd = lookup(self.dom_node.parentNode, obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.cleanup_module()
-                break
-            else:
-                panic('osc not found:', osc_uuid)
+        for osc in self.osclist:
+            osc.cleanup_module()
+            break
 
 class LOVConfig(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'LOVConfig', dom_node)
-        self.lov_uuid = get_first_ref(dom_node, 'lov')
-        l = lookup(dom_node.parentNode, self.lov_uuid)
+    def __init__(self,db):
+        Module.__init__(self, 'LOVConfig', db)
+
+        self.lov_uuid = self.db.get_first_ref('lov')
+        l = self.db.lookup(self.lov_uuid)
         self.lov = LOV(l)
         
     def prepare(self):
@@ -1007,29 +1173,59 @@ class LOVConfig(Module):
         #nothing to do here
         pass
 
-
-class MDS(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'MDS', dom_node)
-        self.devname, self.size = get_device(dom_node)
-        self.fstype = get_text(dom_node, 'fstype')
+class MDSDEV(Module):
+    def __init__(self,db):
+        Module.__init__(self, 'MDSDEV', db)
+        self.devpath = self.db.get_val('devpath','')
+        self.size = self.db.get_val_int('devsize', 0)
+        self.fstype = self.db.get_val('fstype', '')
+        # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
+        target_uuid = self.db.get_first_ref('target')
+        mds = self.db.lookup(target_uuid)
+        self.name = mds.getName()
+        self.lovconfig_uuids = mds.get_refs('lovconfig')
         # FIXME: if fstype not set, then determine based on kernel version
-        self.format = get_text(dom_node, 'autoformat', "no")
+        self.format = self.db.get_val('autoformat', "no")
+
+        active_uuid = mds.get_active_target()
+        if not active_uuid:
+            panic("No target device found:", target_uuid)
+        if active_uuid == self.uuid:
+            self.active = 1
+        else:
+            self.active = 0
+        self.target_dev_uuid = self.uuid
+        self.uuid = target_uuid
+        # modules
         if self.fstype == 'extN':
             self.add_lustre_module('extN', 'extN') 
         self.add_lustre_module('mds', 'mds')
-        self.add_lustre_module('obdclass', 'fsfilt_%s'%(self.fstype))
+        if self.fstype:
+            self.add_lustre_module('obdclass', 'fsfilt_%s' % (self.fstype))
+
+    def load_module(self):
+        if self.active:
+            Module.load_module(self)
             
     def prepare(self):
         if is_prepared(self.uuid):
             return
-        self.info(self.devname, self.fstype, self.format)
-        blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
+        if not self.active:
+            debug(self.uuid, "not active")
+            return
+        self.info(self.devpath, self.fstype, self.format)
+        run_acceptors()
+        blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
         if not is_prepared('MDT_UUID'):
             lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'),
                         setup ="")
         lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
                     setup ="%s %s" %(blkdev, self.fstype))
+        for uuid in self.lovconfig_uuids:
+            db = self.db.lookup(uuid)
+            lovconfig = LOVConfig(db)
+            lovconfig.prepare()
+            
     def cleanup(self):
         if is_prepared('MDT_UUID'):
             try:
@@ -1038,79 +1234,171 @@ class MDS(Module):
                 print "cleanup failed: ", self.name
                 e.dump()
                 cleanup_error(e.rc)
-        if not is_prepared(self.uuid):
-            return
-        Module.cleanup(self)
-        clean_loop(self.devname)
-
-# Very unusual case, as there is no MDC element in the XML anymore
-# Builds itself from an MDS node
-class MDC(Module):
-    def __init__(self,dom_node):
-        self.mds = MDS(dom_node)
-        self.dom_node = dom_node
-        self.module_name = 'MDC'
-        self.kmodule_list = []
-        self._server = None
-        self._connected = 0
-
-        host = socket.gethostname()
-        self.name = 'MDC_%s' % (self.mds.name)
-        self.uuid = '%s_%05x_%05x' % (self.name, int(random.random() * 1048576),
-                                      int(random.random() * 1048576))
-
-        self.lookup_server(self.mds.uuid)
-        self.add_lustre_module('mdc', 'mdc')
-
-    def prepare(self):
         if is_prepared(self.uuid):
-            return
-        self.info(self.mds.uuid)
-        srv = self.get_server()
-        lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-        lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
-                        setup ="%s %s" %(self.mds.uuid, srv.uuid))
-            
-class OBD(Module):
-    def __init__(self, dom_node):
-        Module.__init__(self, 'OBD', dom_node)
-        self.obdtype = get_attr(dom_node, 'type')
-        self.devname, self.size = get_device(dom_node)
-        self.fstype = get_text(dom_node, 'fstype')
-        self.active_target = get_text(dom_node, 'active_target')
+            Module.cleanup(self)
+        clean_loop(self.devpath)
+
+class OSD(Module):
+    def __init__(self, db):
+        Module.__init__(self, 'OSD', db)
+        self.osdtype = self.db.get_val('osdtype')
+        self.devpath = self.db.get_val('devpath', '')
+        self.size = self.db.get_val_int('devsize', 0)
+        self.fstype = self.db.get_val('fstype', '')
+        target_uuid = self.db.get_first_ref('target')
+        ost = self.db.lookup(target_uuid)
+        self.name = ost.getName()
         # FIXME: if fstype not set, then determine based on kernel version
-        self.format = get_text(dom_node, 'autoformat', 'yes')
+        self.format = self.db.get_val('autoformat', 'yes')
         if self.fstype == 'extN':
             self.add_lustre_module('extN', 'extN') 
-        self.add_lustre_module(self.obdtype, self.obdtype)
+
+        active_uuid = ost.get_active_target()
+        if not active_uuid:
+            panic("No target device found:", target_uuid)
+        if active_uuid == self.uuid:
+            self.active = 1
+        else:
+            self.active = 0
+        self.target_dev_uuid = self.uuid
+        self.uuid = target_uuid
+        # modules
+        self.add_lustre_module('ost', 'ost')
+        self.add_lustre_module(self.osdtype, self.osdtype)
         if self.fstype:
             self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
 
+    def load_module(self):
+        if self.active:
+            Module.load_module(self)
+
     # need to check /proc/mounts and /etc/mtab before
     # formatting anything.
     # FIXME: check if device is already formatted.
     def prepare(self):
         if is_prepared(self.uuid):
             return
-        self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
-        if self.obdtype == 'obdecho':
+        if not self.active:
+            debug(self.uuid, "not active")
+            return
+        self.info(self.osdtype, self.devpath, self.size, self.fstype, self.format)
+        run_acceptors()
+        if self.osdtype == 'obdecho':
             blkdev = ''
         else:
-            blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
-        lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
+            blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
+        lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid),
                     setup ="%s %s" %(blkdev, self.fstype))
+        if not is_prepared('OSS_UUID'):
+            lctl.newdev(attach="ost %s %s" % ('OSS', 'OSS_UUID'),
+                        setup ="")
+
     def cleanup(self):
-        if not is_prepared(self.uuid):
+        if is_prepared('OSS_UUID'):
+            try:
+                lctl.cleanup("OSS", "OSS_UUID")
+            except CommandError, e:
+                print "cleanup failed: ", self.name
+                e.dump()
+                cleanup_error(e.rc)
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
+        if not self.osdtype == 'obdecho':
+            clean_loop(self.devpath)
+
+# Generic client module, used by OSC and MDC
+class Client(Module):
+    def __init__(self, tgtdb, module, owner):
+        self.target_name = tgtdb.getName()
+        self.target_uuid = tgtdb.getUUID()
+        self.db = tgtdb
+
+        self.tgt_dev_uuid = tgtdb.get_active_target()
+        if not self.tgt_dev_uuid:
+            panic("No target device found for target:", self.target_name)
+            
+        self.kmodule_list = []
+        self._server = None
+        self._connected = 0
+
+        self.module = module
+        self.module_name = string.upper(module)
+        self.name = '%s_%s_%s' % (self.module_name, owner, self.target_name)
+        self.uuid = '%05x%05x_%.14s_%05x%05x' % (int(random.random() * 1048576),
+                                              int(random.random() * 1048576),self.name,
+                                              int(random.random() * 1048576),
+                                              int(random.random() * 1048576))
+        self.uuid = self.uuid[0:36]
+        self.lookup_server(self.tgt_dev_uuid)
+        self.add_lustre_module(module, module)
+
+    def lookup_server(self, srv_uuid):
+        """ Lookup a server's network information """
+        self._server_nets = self.db.get_ost_net(srv_uuid)
+        if len(self._server_nets) == 0:
+            panic ("Unable to find a server for:", srv_uuid)
+
+    def get_servers(self):
+        return self._server_nets
+
+    def prepare(self, ignore_connect_failure = 0):
+        if is_prepared(self.uuid):
             return
+        self.info(self.target_uuid)
+        try:
+            srv = local_net(self.get_servers())
+            if srv:
+                lctl.connect(srv)
+            else:
+                srv, r =  find_route(self.get_servers())
+                if srv:
+                    lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
+                else:
+                    panic ("no route to",  self.target_uuid)
+        except CommandError:
+            if (ignore_connect_failure == 0):
+                pass
+        if srv:
+            lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
+                        setup ="%s %s" %(self.target_uuid, srv.uuid))
+
+    def cleanup(self):
         Module.cleanup(self)
-        if not self.obdtype == 'obdecho':
-            clean_loop(self.devname)
+        srv = local_net(self.get_servers())
+        if srv:
+            try:
+                lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+            except CommandError, e:
+                log(self.module_name, "disconnect failed: ", self.name)
+                e.dump()
+                cleanup_error(e.rc)
+        else:
+            self.info(self.target_uuid)
+            srv, r =  find_route(self.get_servers())
+            if srv:
+                try:
+                    lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
+                except CommandError, e:
+                    print "del_route failed: ", self.name
+                    e.dump()
+                    cleanup_error(e.rc)
+
+
+
+class MDC(Client):
+    def __init__(self, db, owner):
+         Client.__init__(self, db, 'mdc', owner)
 
+class OSC(Client):
+    def __init__(self, db, owner):
+         Client.__init__(self, db, 'osc', owner)
+
+            
 class COBD(Module):
-    def __init__(self, dom_node):
-        Module.__init__(self, 'COBD', dom_node)
-        self.real_uuid = get_first_ref(dom_node, 'real_obd')
-        self.cache_uuid = get_first_ref(dom_node, 'cache_obd')
+    def __init__(self, db):
+        Module.__init__(self, 'COBD', db)
+        self.real_uuid = self.db.get_first_ref('realobd')
+        self.cache_uuid = self.db.get_first_ref('cacheobd')
         self.add_lustre_module('cobd' , 'cobd')
 
     # need to check /proc/mounts and /etc/mtab before
@@ -1123,28 +1411,15 @@ class COBD(Module):
         lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
                     setup ="%s %s" %(self.real_uuid, self.cache_uuid))
 
-class OST(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'OST', dom_node)
-        self.obd_uuid = get_first_ref(dom_node, 'obd')
-        self.add_lustre_module('ost', 'ost')
-
-    def prepare(self):
-        if is_prepared(self.uuid):
-            return
-        self.info(self.obd_uuid)
-        lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
-                    setup ="%s" % (self.obd_uuid))
-
 
 # virtual interface for  OSC and LOV
 class VOSC(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'VOSC', dom_node)
-        if dom_node.nodeName == 'lov':
-            self.osc = LOV(dom_node)
+    def __init__(self,db, owner):
+        Module.__init__(self, 'VOSC', db)
+        if db.get_class() == 'lov':
+            self.osc = LOV(db)
         else:
-            self.osc = get_osc(dom_node)
+            self.osc = get_osc(db, owner)
     def get_uuid(self):
         return self.osc.uuid
     def prepare(self):
@@ -1155,81 +1430,34 @@ class VOSC(Module):
         self.osc.load_module()
     def cleanup_module(self):
         self.osc.cleanup_module()
-        
-
-class OSC(Module):
-    def __init__(self, dom_node, obd_name, obd_uuid, ost_uuid):
-        self.dom_node = dom_node
-        self.module_name = 'OSC'
-        self.name = 'OSC_%s' % (obd_name)
-        self.uuid = '%s_%05x' % (self.name, int(random.random() * 1048576))
-        self.kmodule_list = []
-        self._server = None
-        self._connected = 0
-
-        self.obd_uuid = obd_uuid
-        self.ost_uuid = ost_uuid
-        self.lookup_server(self.ost_uuid)
-        self.add_lustre_module('osc', 'osc')
+    def need_mdc(self):
+        return self.db.get_class() != 'lov'
+    def get_mdc_uuid(self):
+        if self.db.get_class() == 'lov':
+            return self.osc.mdc_uuid
+        return ''
 
-    def prepare(self, ignore_connect_failure = 0):
-        if is_prepared(self.uuid):
-            return
-        self.info(self.obd_uuid, self.ost_uuid)
-        srv = self.get_server()
-        try:
-            if local_net(srv):
-                lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-            else:
-                r =  find_route(srv)
-                if r:
-                    lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
-                else:
-                    panic ("no route to",  srv.nid)
-        except CommandError:
-            if (ignore_connect_failure == 0):
-                pass
-            
-        lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
-                    setup ="%s %s" %(self.obd_uuid, srv.uuid))
-
-    def cleanup(self):
-        srv = self.get_server()
-        if local_net(srv):
-            Module.cleanup(self)
-        else:
-            self.info(self.obd_uuid, self.ost_uuid)
-            r =  find_route(srv)
-            if r:
-                try:
-                    lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
-                except CommandError, e:
-                    print "del_route failed: ", self.name
-                    e.dump()
-                    cleanup_error(e.rc)
-            Module.cleanup(self)
-            
 
 class ECHO_CLIENT(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'ECHO_CLIENT', dom_node)
+    def __init__(self,db):
+        Module.__init__(self, 'ECHO_CLIENT', db)
         self.add_lustre_module('obdecho', 'obdecho')
-        self.obd_uuid = get_first_ref(dom_node, 'obd')
-        obd = lookup(self.dom_node.parentNode, self.obd_uuid)
-        self.osc = VOSC(obd)
+        self.obd_uuid = self.db.get_first_ref('obd')
+        obd = self.db.lookup(self.obd_uuid)
+        self.osc = VOSC(obd, self.name)
 
     def prepare(self):
         if is_prepared(self.uuid):
             return
         self.osc.prepare() # XXX This is so cheating. -p
         self.info(self.obd_uuid)
-            
+
         lctl.newdev(attach="echo_client %s %s" % (self.name, self.uuid),
-                    setup = self.obd_uuid)
+                    setup = self.osc.get_uuid())
 
     def cleanup(self):
-        if not is_prepared(self.uuid):
-            return
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
         self.osc.cleanup()
 
     def load_module(self):
@@ -1241,23 +1469,29 @@ class ECHO_CLIENT(Module):
 
 
 class Mountpoint(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'MTPT', dom_node)
-        self.path = get_text(dom_node, 'path')
-        self.mds_uuid = get_first_ref(dom_node, 'mds')
-        self.obd_uuid = get_first_ref(dom_node, 'obd')
-        self.add_lustre_module('mdc', 'mdc')
+    def __init__(self,db):
+        Module.__init__(self, 'MTPT', db)
+        self.path = self.db.get_val('path')
+        self.mds_uuid = self.db.get_first_ref('mds')
+        self.obd_uuid = self.db.get_first_ref('obd')
+        obd = self.db.lookup(self.obd_uuid)
+        self.vosc = VOSC(obd, self.name)
+        if self.vosc.need_mdc():
+            self.add_lustre_module('mdc', 'mdc')
         self.add_lustre_module('llite', 'llite')
-        obd = lookup(self.dom_node.parentNode, self.obd_uuid)
-        self.osc = VOSC(obd)
 
 
     def prepare(self):
-        self.osc.prepare()
-        mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
+        self.vosc.prepare()
+        if self.vosc.need_mdc():
+            mdc_uuid = prepare_mdc(self.db, self.name,  self.mds_uuid)
+        else:
+            mdc_uuid = self.vosc.get_mdc_uuid()
+        if not mdc_uuid:
+            panic("Unable to determine MDC UUID. Probably need to cleanup before re-mounting.")
         self.info(self.path, self.mds_uuid, self.obd_uuid)
         cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
-              (self.osc.get_uuid(), mdc_uuid, self.path)
+              (self.vosc.get_uuid(), mdc_uuid, self.path)
         run("mkdir", self.path)
         ret, val = run(cmd)
         if ret:
@@ -1276,218 +1510,465 @@ class Mountpoint(Module):
         if fs_is_mounted(self.path):
             panic("fs is still mounted:", self.path)
 
-        self.osc.cleanup()
-        cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
+        self.vosc.cleanup()
+        if self.vosc.need_mdc():
+            cleanup_mdc(self.db, self.name, self.mds_uuid)
 
     def load_module(self):
-        self.osc.load_module()
+        self.vosc.load_module()
         Module.load_module(self)
     def cleanup_module(self):
         Module.cleanup_module(self)
-        self.osc.cleanup_module()
+        self.vosc.cleanup_module()
 
 
 # ============================================================
 # XML processing and query
 
-# OSC is no longer in the xml, so we have to fake it.
-# this is getting ugly and begging for another refactoring
-def get_osc(obd_dom):
-    obd = OBD(obd_dom)
-    osc = OSC(obd_dom, obd.name, obd.uuid, obd.active_target)
-    return osc
-
-
-def get_device(obd):
-    list = obd.getElementsByTagName('device')
-    if len(list) > 0:
-        dev = list[0]
-        dev.normalize();
-        size = get_attr_int(dev, 'size', 0)
-        return dev.firstChild.data, size
-    return '', 0
-
-# Get the text content from the first matching child
-# If there is no content (or it is all whitespace), return
-# the default
-def get_text(dom_node, tag, default=""):
-    list = dom_node.getElementsByTagName(tag)
-    if len(list) > 0:
-        dom_node = list[0]
-        dom_node.normalize()
-        if dom_node.firstChild:
-            txt = string.strip(dom_node.firstChild.data)
-            if txt:
-                return txt
-    return default
-
-def get_text_int(dom_node, tag, default=0):
-    list = dom_node.getElementsByTagName(tag)
-    n = default
-    if len(list) > 0:
-        dom_node = list[0]
-        dom_node.normalize()
-        if dom_node.firstChild:
-            txt = string.strip(dom_node.firstChild.data)
-            if txt:
-                try:
-                    n = int(txt)
-                except ValueError:
-                    panic("text value is not integer:", txt)
-    return n
+class LustreDB:
+    def lookup(self, uuid):
+        """ lookup returns a new LustreDB instance"""
+        return self._lookup_by_uuid(uuid)
+
+    def lookup_name(self, name, class_name = ""):
+        """ lookup returns a new LustreDB instance"""
+        return self._lookup_by_name(name, class_name)
+
+    def lookup_class(self, class_name):
+        """ lookup returns a new LustreDB instance"""
+        return self._lookup_by_class(class_name)
+
+    def get_val(self, tag, default=None):
+        v =  self._get_val(tag)
+        if v:
+            return v
+        if default != None:
+            return default
+        debug("LustreDB", self.getName(), " no value for:", tag)
+        return None
 
-def get_attr(dom_node, attr, default=""):
-    v = dom_node.getAttribute(attr)
-    if v:
-        return v
-    return default
+    def get_class(self):
+        return self._get_class()
 
-def get_attr_int(dom_node, attr, default=0):
-    n = default
-    v = dom_node.getAttribute(attr)
-    if v:
+    def get_val_int(self, tag, default=0):
+        str = self._get_val(tag)
         try:
-            n = int(v)
+            if str:
+                return int(str)
+            return default
         except ValueError:
-            panic("attr value is not integer", v)
-    return n
-
-def get_first_ref(dom_node, tag):
-    """ Get the first uuidref of the type TAG. Used one only
-    one is expected.  Returns the uuid."""
-    uuid = None
-    refname = '%s_ref' % tag
-    list = dom_node.getElementsByTagName(refname)
-    if len(list) > 0:
-        uuid = getRef(list[0])
-    return uuid
+            panic("text value is not integer:", str)
+            
+    def get_first_ref(self, tag):
+        """ Get the first uuidref of the type TAG. Only
+        one is expected.  Returns the uuid."""
+        uuids = self._get_refs(tag)
+        if len(uuids) > 0:
+            return  uuids[0]
+        return None
     
-def get_all_refs(dom_node, tag):
-    """ Get all the refs of type TAG.  Returns list of uuids. """
-    uuids = []
-    refname = '%s_ref' % tag
-    list = dom_node.getElementsByTagName(refname)
-    if len(list) > 0:
-        for i in list:
-            uuids.append(getRef(i))
-    return uuids
-
-def get_ost_net(dom_node, uuid):
-    ost = lookup(dom_node, uuid)
-    uuid = get_first_ref(ost, 'network')
-    if not uuid:
+    def get_refs(self, tag):
+        """ Get all the refs of type TAG.  Returns list of uuids. """
+        uuids = self._get_refs(tag)
+        return uuids
+
+    def get_all_refs(self):
+        """ Get all the refs.  Returns list of uuids. """
+        uuids = self._get_all_refs()
+        return uuids
+
+    def get_ost_net(self, osd_uuid):
+        srv_list = []
+        if not osd_uuid:
+            return srv_list
+        osd = self.lookup(osd_uuid)
+        node_uuid = osd.get_first_ref('node')
+        node = self.lookup(node_uuid)
+        if not node:
+            panic("unable to find node for osd_uuid:", osd_uuid,
+                  " node_ref:", node_uuid)
+        for net_uuid in node.get_networks():
+            db = node.lookup(net_uuid)
+            srv_list.append(Network(db))
+        return srv_list
+
+    def nid2server(self, nid, net_type):
+        netlist = self.lookup_class('network')
+        for net_db in netlist:
+            if net_db.get_val('nid') == nid and net_db.get_val('nettype') == net_type: 
+                return net_db
         return None
-    return lookup(dom_node, uuid)
-
-def nid2server(dom_node, nid):
-    netlist = dom_node.getElementsByTagName('network')
-    for net_node in netlist:
-        if get_text(net_node, 'server') == nid:
-            return Network(net_node)
-    return None
     
-def lookup(dom_node, uuid):
-    for n in dom_node.childNodes:
-        if n.nodeType == n.ELEMENT_NODE:
-            if getUUID(n) == uuid:
-                return n
+    # the tag name is the service type
+    # fixme: this should do some checks to make sure the dom_node is a service
+    #
+    # determine what "level" a particular node is at.
+    
+    # the order of iniitailization is based on level. 
+    def getServiceLevel(self):
+        type = self.get_class()
+        ret=0;
+        if type in ('network',):
+            ret = 5
+        elif type in ('routetbl',):
+            ret = 6
+        elif type in ('ptlrpc',):
+            ret = 7
+        elif type in ('device', 'ldlm'):
+            ret = 20
+        elif type in ('osd', 'mdd', 'cobd'):
+            ret = 30
+        elif type in ('mdsdev','ost'):
+            ret = 40
+        elif type in ('mdc','osc'):
+            ret = 50
+        elif type in ('lov',):
+            ret = 60
+        elif type in ('mountpoint', 'echoclient'):
+            ret = 70
+
+        if ret < config.minlevel() or ret > config.maxlevel():
+            ret = 0 
+        return ret
+    
+    #
+    # return list of services in a profile. list is a list of tuples
+    # [(level, db_object),]
+    def getServices(self):
+        list = []
+        for ref_class, ref_uuid in self.get_all_refs(): 
+                servdb = self.lookup(ref_uuid)
+                if  servdb:
+                    level = servdb.getServiceLevel()
+                    if level > 0:
+                        list.append((level, servdb))
+                else:
+                    panic('service not found: ' + ref_uuid)
+                    
+        list.sort()
+        return list
+
+    # Find the target_device for target on a node
+    # node->profiles->device_refs->target
+    def get_target_device(self, target_uuid, node_name):
+        node_db = self.lookup_name(node_name)
+        if not node_db:
+            return None
+        prof_list = node_db.get_refs('profile')
+        for prof_uuid in prof_list:
+            prof_db = node_db.lookup(prof_uuid)
+            ref_list = prof_db.get_all_refs()
+            for ref in ref_list:
+                dev = self.lookup(ref[1])
+                if dev and dev.get_first_ref('target') == target_uuid:
+                    return ref[1]
+        return None
+
+    def get_active_target(self):
+        target_uuid = self.getUUID()
+        target_name = self.getName()
+        node_name = config.select(target_name)
+        if node_name:
+            tgt_dev_uuid = self.get_target_device(target_uuid, node_name)
+        else:
+            tgt_dev_uuid = self.get_first_ref('active')
+        return tgt_dev_uuid
+        
+
+    # get all network uuids for this node
+    def get_networks(self):
+        ret = []
+        prof_list = self.get_refs('profile')
+        for prof_uuid in prof_list:
+            prof_db = self.lookup(prof_uuid)
+            net_list = prof_db.get_refs('network')
+            #debug("get_networks():", prof_uuid, net_list)
+            for net_uuid in net_list:
+                ret.append(net_uuid)
+        return ret
+
+class LustreDB_XML(LustreDB):
+    def __init__(self, dom, root_node):
+        # init xmlfile
+        self.dom_node = dom
+        self.root_node = root_node
+
+    def xmltext(self, dom_node, tag):
+        list = dom_node.getElementsByTagName(tag)
+        if len(list) > 0:
+            dom_node = list[0]
+            dom_node.normalize()
+            if dom_node.firstChild:
+                txt = string.strip(dom_node.firstChild.data)
+                if txt:
+                    return txt
+
+    def xmlattr(self, dom_node, attr):
+        return dom_node.getAttribute(attr)
+
+    def _get_val(self, tag):
+        """a value could be an attribute of the current node
+        or the text value in a child node"""
+        ret  = self.xmlattr(self.dom_node, tag)
+        if not ret:
+            ret = self.xmltext(self.dom_node, tag)
+        return ret
+
+    def _get_class(self):
+        return self.dom_node.nodeName
+
+    #
+    # [(ref_class, ref_uuid),]
+    def _get_all_refs(self):
+        list = []
+        for n in self.dom_node.childNodes: 
+            if n.nodeType == n.ELEMENT_NODE:
+                ref_uuid = self.xml_get_ref(n)
+                ref_class = n.nodeName
+                list.append((ref_class, ref_uuid))
+                    
+        list.sort()
+        return list
+
+    def _get_refs(self, tag):
+        """ Get all the refs of type TAG.  Returns list of uuids. """
+        uuids = []
+        refname = '%s_ref' % tag
+        reflist = self.dom_node.getElementsByTagName(refname)
+        for r in reflist:
+            uuids.append(self.xml_get_ref(r))
+        return uuids
+
+    def xmllookup_by_uuid(self, dom_node, uuid):
+        for n in dom_node.childNodes:
+            if n.nodeType == n.ELEMENT_NODE:
+                if self.xml_get_uuid(n) == uuid:
+                    return n
+                else:
+                    n = self.xmllookup_by_uuid(n, uuid)
+                    if n: return n
+        return None
+
+    def _lookup_by_uuid(self, uuid):
+        dom = self. xmllookup_by_uuid(self.root_node, uuid)
+        if dom:
+            return LustreDB_XML(dom, self.root_node)
+
+    def xmllookup_by_name(self, dom_node, name):
+        for n in dom_node.childNodes:
+            if n.nodeType == n.ELEMENT_NODE:
+                if self.xml_get_name(n) == name:
+                    return n
+                else:
+                    n = self.xmllookup_by_name(n, name)
+                    if n: return n
+        return None
+
+    def _lookup_by_name(self, name, class_name):
+        dom = self.xmllookup_by_name(self.root_node, name)
+        if dom:
+            return LustreDB_XML(dom, self.root_node)
+
+    def xmllookup_by_class(self, dom_node, class_name):
+        return dom_node.getElementsByTagName(class_name)
+
+    def _lookup_by_class(self, class_name):
+        ret = []
+        domlist = self.xmllookup_by_class(self.root_node, class_name)
+        for node in domlist:
+            ret.append(LustreDB_XML(node, self.root_node))
+        return ret
+
+    def xml_get_name(self, n):
+        return n.getAttribute('name')
+        
+    def getName(self):
+        return self.xml_get_name(self.dom_node)
+
+    def xml_get_ref(self, n):
+        return n.getAttribute('uuidref')
+
+    def xml_get_uuid(self, dom_node):
+        return dom_node.getAttribute('uuid')
+
+    def getUUID(self):
+        return self.xml_get_uuid(self.dom_node)
+
+    def get_routes(self, type, gw):
+        """ Return the routes as a list of tuples of the form:
+        [(type, gw, lo, hi),]"""
+        res = []
+        tbl = self.dom_node.getElementsByTagName('routetbl')
+        for t in tbl:
+            routes = t.getElementsByTagName('route')
+            for r in routes:
+                net_type = self.xmlattr(r, 'type')
+                if type != net_type:
+                    lo = self.xmlattr(r, 'lo')
+                    hi = self.xmlattr(r, 'hi')
+                    res.append((type, gw, lo, hi))
+        return res
+
+    def get_route_tbl(self):
+        ret = []
+        for r in self.dom_node.getElementsByTagName('route'):
+            net_type = self.xmlattr(r, 'type')
+            gw = self.xmlattr(r, 'gw')
+            lo = self.xmlattr(r, 'lo')
+            hi = self.xmlattr(r, 'hi')
+            ret.append((net_type, gw, lo, hi))
+        return ret
+
+
+# ================================================================    
+# LDAP Support
+class LustreDB_LDAP(LustreDB):
+    def __init__(self, name, attrs,
+                 base = "fs=lustre",
+                 parent = None,
+                 url  = "ldap://localhost",
+                 user = "cn=Manager, fs=lustre",
+                 pw   = "secret"
+                 ):
+        self._name = name
+        self._attrs = attrs
+        self._base = base
+        self._parent = parent
+        self._url  = url
+        self._user = user
+        self._pw   = pw
+        if parent:
+            self.l = parent.l
+            self._base = parent._base
+        else:
+            self.open()
+
+    def open(self):
+        import ldap
+        try:
+            self.l = ldap.initialize(self._url)
+            # Set LDAP protocol version used
+            self.l.protocol_version=ldap.VERSION3
+            # user and pw only needed if modifying db
+            self.l.bind_s("", "", ldap.AUTH_SIMPLE);
+        except ldap.LDAPError, e:
+            panic(e)
+            # FIXME, do something useful here
+
+    def close(self):
+        self.l.unbind_s()
+
+    def ldap_search(self, filter):
+        """Return list of uuids matching the filter."""
+        import ldap
+        dn = self._base
+        ret = []
+        uuids = []
+        try:
+            for name, attrs in self.l.search_s(dn, ldap.SCOPE_ONELEVEL,
+                                        filter, ["uuid"]):
+                for v in attrs['uuid']:
+                    uuids.append(v)
+        except ldap.NO_SUCH_OBJECT, e:
+            pass
+        except ldap.LDAPError, e:
+            print e                     # FIXME: die here?
+        if len(uuids) > 0:
+            for uuid in uuids:
+                ret.append(self._lookup_by_uuid(uuid))
+        return ret
+
+    def _lookup_by_name(self, name, class_name):
+        list =  self.ldap_search("lustreName=%s" %(name))
+        if len(list) == 1:
+            return list[0]
+        return []
+
+    def _lookup_by_class(self, class_name):
+        return self.ldap_search("objectclass=%s" %(string.upper(class_name)))
+
+    def _lookup_by_uuid(self, uuid):
+        import ldap
+        dn = "uuid=%s,%s" % (uuid, self._base)
+        ret = None
+        try:
+            for name, attrs in self.l.search_s(dn, ldap.SCOPE_BASE,
+                                               "objectclass=*"):
+                ret = LustreDB_LDAP(name, attrs,  parent = self)
+                        
+        except ldap.NO_SUCH_OBJECT, e:
+            debug("NO_SUCH_OBJECT:", uuid)
+            pass                        # just return empty list
+        except ldap.LDAPError, e:
+            print e                     # FIXME: die here?
+        return ret
+
+
+    def _get_val(self, k):
+        ret = None
+        if self._attrs.has_key(k):
+            v = self._attrs[k]
+            if type(v) == types.ListType:
+                ret = str(v[0])
             else:
-                n = lookup(n, uuid)
-                if n: return n
-    return None
-            
-# Get name attribute of dom_node
-def getName(dom_node):
-    return dom_node.getAttribute('name')
+                ret = str(v)
+        return ret
 
-def getRef(dom_node):
-    return dom_node.getAttribute('uuidref')
+    def _get_class(self):
+        return string.lower(self._attrs['objectClass'][0])
 
-# Get name attribute of dom_node
-def getUUID(dom_node):
-    return dom_node.getAttribute('uuid')
+    #
+    # [(ref_class, ref_uuid),]
+    def _get_all_refs(self):
+        list = []
+        for k in self._attrs.keys():
+            if re.search('.*Ref', k):
+                for uuid in self._attrs[k]:
+                    list.append((k, uuid))
+        return list
 
-# the tag name is the service type
-# fixme: this should do some checks to make sure the dom_node is a service
-def getServiceType(dom_node):
-    return dom_node.nodeName
+    def _get_refs(self, tag):
+        """ Get all the refs of type TAG.  Returns list of uuids. """
+        uuids = []
+        refname = '%sRef' % tag
+        if self._attrs.has_key(refname):
+            return self._attrs[refname]
+        return []
 
-#
-# determine what "level" a particular node is at.
-# the order of iniitailization is based on level. 
-def getServiceLevel(dom_node):
-    type = getServiceType(dom_node)
-    ret=0;
-    if type in ('network',):
-        ret = 10
-    elif type in ('device', 'ldlm'):
-        ret = 20
-    elif type in ('obd', 'mdd', 'cobd'):
-        ret = 30
-    elif type in ('mds','ost'):
-        ret = 40
-    elif type in ('mdc','osc'):
-        ret = 50
-    elif type in ('lov', 'lovconfig'):
-        ret = 60
-    elif type in ('mountpoint', 'echo_client'):
-        ret = 70
-
-    if ret < config.minlevel() or ret > config.maxlevel():
-        ret = 0 
-    return ret
+    def getName(self):
+        return self._get_val('lustreName')
 
-#
-# return list of services in a profile. list is a list of tuples
-# [(level, dom_node),]
-def getServices(lustreNode, profileNode):
-    list = []
-    for n in profileNode.childNodes: 
-        if n.nodeType == n.ELEMENT_NODE:
-            servNode = lookup(lustreNode, getRef(n))
-            if not servNode:
-                print n
-                panic('service not found: ' + getRef(n))
-            level = getServiceLevel(servNode)
-           if level > 0:
-                list.append((level, servNode))
-    list.sort()
-    return list
-
-def getByName(lustreNode, name, tag):
-    ndList = lustreNode.getElementsByTagName(tag)
-    for nd in ndList:
-        if getName(nd) == name:
-            return nd
-    return None
-    
+    def getUUID(self):
+        return self._get_val('uuid')
+
+    def get_route_tbl(self):
+        return []
 
 ############################################################
 # MDC UUID hack - 
 # FIXME: clean this mess up!
 #
-saved_mdc = {}
-def prepare_mdc(dom_node, mds_uuid):
-    global saved_mdc
-    mds_node = lookup(dom_node, mds_uuid);
-    if not mds_node:
+# OSC is no longer in the xml, so we have to fake it.
+# this is getting ugly and begging for another refactoring
+def get_osc(ost_db, owner):
+    osc = OSC(ost_db, owner)
+    return osc
+
+def get_mdc(db, owner, mds_uuid):
+    mds_db = db.lookup(mds_uuid);
+    if not mds_db:
         panic("no mds:", mds_uuid)
-    if saved_mdc.has_key(mds_uuid):
-        return saved_mdc[mds_uuid]
-    mdc = MDC(mds_node)
+    mdc = MDC(mds_db, owner)
+    return mdc
+
+def prepare_mdc(db, owner, mds_uuid):
+    mdc = get_mdc(db, owner, mds_uuid)
     mdc.prepare()
-    saved_mdc[mds_uuid] = mdc.uuid
     return mdc.uuid
 
-def cleanup_mdc(dom_node, mds_uuid):
-    global saved_mdc
-    mds_node = lookup(dom_node, mds_uuid);
-    if not mds_node:
-        panic("no mds:", mds_uuid)
-    if not saved_mdc.has_key(mds_uuid):
-        mdc = MDC(mds_node)
-        mdc.cleanup()
-        saved_mdc[mds_uuid] = mdc.uuid
+def cleanup_mdc(db, owner, mds_uuid):
+    mdc = get_mdc(db, owner, mds_uuid)
+    mdc.cleanup()
         
 
 ############################################################
@@ -1497,125 +1978,111 @@ routes = []
 local_node = []
 router_flag = 0
 
-def init_node(dom_node):
-    global local_node, router_flag
-    netlist = dom_node.getElementsByTagName('network')
-    for dom_net in netlist:
-        type = get_attr(dom_net, 'type')
-        gw = get_text(dom_net, 'server')
-        local_node.append((type, gw))
+def add_local_interfaces(node_db):
+    global local_node
+    for netuuid in node_db.get_networks():
+        net = node_db.lookup(netuuid)
+        srv = Network(net)
+        debug("add_local", netuuid)
+        local_node.append((srv.net_type, srv.nid))
+        if acceptors.has_key(srv.port):
+            panic("duplicate port:", srv.port)
+        if srv.net_type in ('tcp', 'toe'):
+            acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type,
+                                                  srv.send_mem, srv.recv_mem,
+                                                  srv.irq_affinity,
+                                                  srv.nid_exchange)
 
 def node_needs_router():
     return router_flag
 
-def get_routes(type, gw, dom_net):
-    """ Return the routes as a list of tuples of the form:
-        [(type, gw, lo, hi),]"""
-    res = []
-    tbl = dom_net.getElementsByTagName('route_tbl')
-    for t in tbl:
-        routes = t.getElementsByTagName('route')
-        for r in routes:
-            lo = get_attr(r, 'lo')
-            hi = get_attr(r, 'hi', '')
-            res.append((type, gw, lo, hi))
-    return res
-    
-
 def init_route_config(lustre):
     """ Scan the lustre config looking for routers.  Build list of
     routes. """
     global routes, router_flag
     routes = []
-    list = lustre.getElementsByTagName('node')
-    for node in list:
-        if get_attr(node, 'router'):
+    list = lustre.lookup_class('node')
+    for node_db in list:
+        if node_db.get_val_int('router', 0):
             router_flag = 1
+            #debug("init_route_config: found router", node_db.getName())
             for (local_type, local_nid) in local_node:
+                #debug("init_route_config:", local_type, local_nid)
                 gw = None
-                netlist = node.getElementsByTagName('network')
-                for dom_net in netlist:
-                    if local_type == get_attr(dom_net, 'type'):
-                        gw = get_text(dom_net, 'server')
+                for netuuid in node_db.get_networks():
+                    db = node_db.lookup(netuuid)
+                    if local_type == db.get_val('nettype'):
+                        gw = db.get_val('nid')
                         break
+                #debug("init_route_config: gw is", gw)
                 if not gw:
                     continue
-                for dom_net in netlist:
-                    if local_type != get_attr(dom_net, 'type'):
-                        for route in get_routes(local_type, gw, dom_net):
-                            routes.append(route)
-    
+                for route in node_db.get_routes(local_type, gw):
+                    routes.append(route)
+    debug("init_route_config routes:", routes)
+
 
-def local_net(net):
+def local_net(srv_list):
     global local_node
     for iface in local_node:
-        if net.net_type == iface[0]:
+        for srv in srv_list:
+            #debug("local_net a:", srv.net_type, "b:", iface[0])
+            if srv.net_type == iface[0]:
+                return srv
+    return None
+
+def local_net_type(net_type):
+    global local_node
+    for iface in local_node:
+        if net_type == iface[0]:
             return 1
     return 0
 
-def find_route(net):
+def find_route(srv_list):
     global local_node, routes
     frm_type = local_node[0][0]
-    to_type = net.net_type
-    to = net.nid
-    debug ('looking for route to', to_type,to)
-    for r in routes:
-        if  r[2] == to:
-            return r
-    return None
+    for srv in srv_list:
+        #debug("find_route: srv:", srv.hostaddr, "type: ", srv.net_type)
+        to_type = srv.net_type
+        to = srv.hostaddr
+        #debug ('looking for route to', to_type, to)
+        for r in routes:
+            #debug("find_route: ", r)
+            if  r[2] == to:
+                return srv, r
+    return None,None
            
-    
-        
 
 ############################################################
 # lconf level logic
 # Start a service.
-def startService(dom_node, module_flag):
-    type = getServiceType(dom_node)
-    debug('Service:', type, getName(dom_node), getUUID(dom_node))
-    # there must be a more dynamic way of doing this...
+def newService(db):
+    type = db.get_class()
+    debug('Service:', type, db.getName(), db.getUUID())
     n = None
     if type == 'ldlm':
-        n = LDLM(dom_node)
+        n = LDLM(db)
+    elif type == 'ptlrpc':
+        n = PTLRPC(db)
     elif type == 'lov':
-        n = LOV(dom_node)
-    elif type == 'lovconfig':
-        n = LOVConfig(dom_node)
+        n = LOV(db)
     elif type == 'network':
-        n = Network(dom_node)
-    elif type == 'obd':
-        n = OBD(dom_node)
+        n = Network(db)
+    elif type == 'routetbl':
+        n = Router(db)
+    elif type == 'osd':
+        n = OSD(db)
     elif type == 'cobd':
-        n = COBD(dom_node)
-    elif type == 'ost':
-        n = OST(dom_node)
-    elif type == 'mds':
-        n = MDS(dom_node)
-    elif type == 'osc':
-        n = VOSC(dom_node)
-    elif type == 'mdc':
-        n = MDC(dom_node)
+        n = COBD(db)
+    elif type == 'mdsdev':
+        n = MDSDEV(db)
     elif type == 'mountpoint':
-        n = Mountpoint(dom_node)
-    elif type == 'echo_client':
-        n = ECHO_CLIENT(dom_node)
+        n = Mountpoint(db)
+    elif type == 'echoclient':
+        n = ECHO_CLIENT(db)
     else:
         panic ("unknown service type:", type)
-
-    if module_flag:
-        if config.nomod():
-            return
-        if config.cleanup():
-            n.cleanup_module()
-        else:
-            n.load_module()
-    else:
-        if config.nosetup():
-            return
-        if config.cleanup():
-            n.cleanup()
-        else:
-            n.prepare()
+    return n
 
 #
 # Prepare the system to run lustre using a particular profile
@@ -1625,63 +2092,105 @@ def startService(dom_node, module_flag):
 #  * make sure partitions are in place and prepared
 #  * initialize devices with lctl
 # Levels is important, and needs to be enforced.
-def startProfile(lustreNode, profileNode, module_flag):
-    if not profileNode:
-        panic("profile:", profile, "not found.")
-    services = getServices(lustreNode, profileNode)
-    if config.cleanup():
-        services.reverse()
+def for_each_profile(db, prof_list, operation):
+    for prof_uuid in prof_list:
+        prof_db = db.lookup(prof_uuid)
+        if not prof_db:
+            panic("profile:", profile, "not found.")
+        services = prof_db.getServices()
+        operation(services)
+        
+def doSetup(services):
+    if config.nosetup():
+        return
     for s in services:
-        startService(s[1], module_flag)
+        n = newService(s[1])
+        n.prepare()
+    
+def doModules(services):
+    if config.nomod():
+        return
+    for s in services:
+        n = newService(s[1])
+        n.load_module()
 
+def doCleanup(services):
+    if config.nosetup():
+        return
+    services.reverse()
+    for s in services:
+        n = newService(s[1])
+        n.cleanup()
+
+def doUnloadModules(services):
+    if config.nomod():
+        return
+    services.reverse()
+    for s in services:
+        n = newService(s[1])
+        n.cleanup_module()
 
 #
 # Load profile for 
-def doHost(lustreNode, hosts):
+def doHost(lustreDB, hosts):
     global routes
     global router_flag 
-    dom_node = None
+    node_db = None
     for h in hosts:
-        dom_node = getByName(lustreNode, h, 'node')
-        if dom_node:
+        node_db = lustreDB.lookup_name(h, 'node')
+        if node_db:
             break
-    if not dom_node:
+    if not node_db:
         print 'No host entry found.'
         return
 
-    if get_attr(dom_node, 'router'):
-        router_flag = 1
-    else:
-        router_flag = 0
-    recovery_upcall = get_attr(dom_node, 'recovery_upcall')
-    timeout = get_attr_int(dom_node, 'timeout')
+    router_flag = node_db.get_val_int('router', 0)
+    recovery_upcall = node_db.get_val('recovery_upcall', '')
+    timeout = node_db.get_val_int('timeout', 0)
 
+    add_local_interfaces(node_db)
     if not router_flag:
-        init_node(dom_node)
-        init_route_config(lustreNode)
+        init_route_config(lustreDB)
 
     # Two step process: (1) load modules, (2) setup lustre
     # if not cleaning, load modules first.
-    module_flag = not config.cleanup()
-    reflist = dom_node.getElementsByTagName('profile')
-    for profile in reflist:
-            startProfile(lustreNode,  profile, module_flag)
+    prof_list = node_db.get_refs('profile')
+
+    if config.cleanup():
+        if config.force():
+            # the command line can override this value
+            timeout = 5
+        # ugly hack, only need to run lctl commands for --dump
+        if config.lctl_dump():
+            for_each_profile(node_db, prof_list, doCleanup)
+            return
+
+        sys_set_timeout(timeout)
+        sys_set_recovery_upcall(recovery_upcall)
+
+        for_each_profile(node_db, prof_list, doCleanup)
+        for_each_profile(node_db, prof_list, doUnloadModules)
+
+    else:
+        # ugly hack, only need to run lctl commands for --dump
+        if config.lctl_dump():
+            for_each_profile(node_db, prof_list, doSetup)
+            return
+
+        for_each_profile(node_db, prof_list, doModules)
 
-    if not config.cleanup():
         sys_set_debug_path()
         script = config.gdb_script()
         run(lctl.lctl, ' modules >', script)
         if config.gdb():
-            # dump /tmp/ogdb and sleep/pause here
             log ("The GDB module script is in", script)
+            # pause, so user has time to break and
+            # load the script
             time.sleep(5)
         sys_set_timeout(timeout)
         sys_set_recovery_upcall(recovery_upcall)
-            
-            
-    module_flag = not module_flag
-    for profile in reflist:
-            startProfile(lustreNode,  profile, module_flag)
+
+        for_each_profile(node_db, prof_list, doSetup)
 
 ############################################################
 # Command line processing
@@ -1692,7 +2201,8 @@ def parse_cmdline(argv):
                  "portals=", "makeldiff", "cleanup", "noexec",
                  "help", "node=", "nomod", "nosetup",
                  "dump=", "force", "minlevel=", "maxlevel=",
-                 "timeout=", "recovery_upcall="]
+                 "timeout=", "recovery_upcall=",
+                 "ldapurl=", "config=", "select=", "lctl_dump="]
     opts = []
     args = []
 
@@ -1711,7 +2221,6 @@ def parse_cmdline(argv):
             config.verbose(1)
         if o in ("-n", "--noexec"):
             config.noexec(1)
-            config.verbose(1)
         if o == "--portals":
             config.portals_dir(a)
         if o == "--lustre":
@@ -1730,14 +2239,23 @@ def parse_cmdline(argv):
             config.dump_file(a)
         if o in ("-f", "--force"):
             config.force(1)
-       if o in ("--minlevel",):
+       if o == "--minlevel":
                config.minlevel(a)
-        if o in ("--maxlevel",):
+        if o == "--maxlevel":
                 config.maxlevel(a)
-        if o in ("--timeout",):
+        if o == "--timeout":
                 config.timeout(a)
-        if o in ("--recovery_upcall",):
+        if o == "--recovery_upcall":
                 config.recovery_upcall(a)
+        if o == "--ldapurl":
+                config.ldapurl(a)
+        if o == "--config":
+                config.config_name(a)
+        if o == "--select":
+                config.init_select(a)
+        if o == "--lctl_dump":
+            config.lctl_dump(a)
+
     return args
 
 def fetch(url):
@@ -1793,9 +2311,9 @@ def sys_set_recovery_upcall(upcall):
 
 def sys_set_timeout(timeout):
     # the command overrides the value in the node config
-    if config.timeout() >= 0:
+    if config.timeout() > 0:
         timeout = config.timeout()
-    if timeout >= 0:
+    if timeout > 0:
         debug("setting timeout:", timeout)
         sysctl('lustre/timeout', timeout)
 
@@ -1846,7 +2364,7 @@ def sanitise_path():
 # Shutdown does steps in reverse
 #
 def main():
-    global TCP_ACCEPTOR, lctl, MAXTCPBUF
+    global  lctl, MAXTCPBUF
 
     host = socket.gethostname()
 
@@ -1867,10 +2385,17 @@ def main():
         if not os.access(args[0], os.R_OK):
             print 'File not found or readable:', args[0]
             sys.exit(1)
-        dom = xml.dom.minidom.parse(args[0])
-    elif config.url():
-        xmldata = fetch(config.url())
-        dom = xml.dom.minidom.parseString(xmldata)
+        try:
+            dom = xml.dom.minidom.parse(args[0])
+        except Exception:
+            panic("%s does not appear to be a config file." % (args[0]))
+            sys.exit(1) # make sure to die here, even in debug mode.
+        db = LustreDB_XML(dom.documentElement, dom.documentElement)
+    elif config.ldapurl():
+        if not config.config_name():
+            panic("--ldapurl requires --config name")
+        dn = "config=%s,fs=lustre" % (config.config_name())
+        db = LustreDB_LDAP('', {}, base=dn, url = config.ldapurl())
     else:
         usage()
 
@@ -1889,20 +2414,15 @@ def main():
 
     setupModulePath(sys.argv[0])
 
-    TCP_ACCEPTOR = find_prog('acceptor')
-    if not TCP_ACCEPTOR:
-        if config.noexec():
-            TCP_ACCEPTOR = 'acceptor'
-            debug('! acceptor not found')
-        else:
-            panic('acceptor not found')
-
     lctl = LCTLInterface('lctl')
+    if config.lctl_dump():
+        lctl.use_save_file(config.lctl_dump())
+    else:
+        sys_make_devices()
+        sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
+        sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
 
-    sys_make_devices()
-    sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
-    sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
-    doHost(dom.documentElement, node_list)
+    doHost(db, node_list)
 
 if __name__ == "__main__":
     try:
@@ -1915,4 +2435,4 @@ if __name__ == "__main__":
 
     if first_cleanup_error:
         sys.exit(first_cleanup_error)
-
+