Whamcloud - gitweb
land 0.5.20.3 b_devel onto HEAD (b_devel will remain)
[fs/lustre-release.git] / lustre / utils / lconf.in
index 170c5d0..cbe05dd 100755 (executable)
 #
 # Based in part on the XML obdctl modifications done by Brian Behlendorf 
 
 #
 # Based in part on the XML obdctl modifications done by Brian Behlendorf 
 
-import sys, getopt
-import string, os, stat, popen2, socket, time, random
+import sys, getopt, types
+import string, os, stat, popen2, socket, time, random, fcntl, select
 import re, exceptions
 import xml.dom.minidom
 
 import re, exceptions
 import xml.dom.minidom
 
+if sys.version[0] == '1':
+    from FCNTL import F_GETFL, F_SETFL
+else:
+    from fcntl import F_GETFL, F_SETFL
+
 # Global parameters
 # Global parameters
-TCP_ACCEPTOR = ''
 MAXTCPBUF = 1048576
 DEFAULT_TCPBUF = 1048576
 #
 MAXTCPBUF = 1048576
 DEFAULT_TCPBUF = 1048576
 #
@@ -50,8 +54,10 @@ def usage():
     print """usage: lconf config.xml
 
 config.xml          Lustre configuration in xml format.
     print """usage: lconf config.xml
 
 config.xml          Lustre configuration in xml format.
---get <url>         URL to fetch a config file
+--ldapurl           LDAP server URL, eg. ldap://localhost
+--config            Cluster config name used for LDAP query
 --node <nodename>   Load config for <nodename>
 --node <nodename>   Load config for <nodename>
+--select service=nodeA,service2=nodeB   U
 -d | --cleanup      Cleans up config. (Shutdown)
 -f | --force        Forced unmounting and/or obd detach during cleanup
 -v | --verbose      Print system commands as they are run
 -d | --cleanup      Cleans up config. (Shutdown)
 -f | --force        Forced unmounting and/or obd detach during cleanup
 -v | --verbose      Print system commands as they are run
@@ -70,10 +76,10 @@ config.xml          Lustre configuration in xml format.
                     Levels are aproximatly like:
                             10 - network
                             20 - device, ldlm
                     Levels are aproximatly like:
                             10 - network
                             20 - device, ldlm
-                            30 - obd, mdd
+                            30 - osd, mdd
                             40 - mds, ost
                             50 - mdc, osc
                             40 - mds, ost
                             50 - mdc, osc
-                            60 - lov, lovconfig
+                            60 - lov
                             70 - mountpoint, echo_client
 --lustre=src_dir    Base directory of lustre sources. This parameter will cause lconf
                     to load modules from a source tree.
                             70 - mountpoint, echo_client
 --lustre=src_dir    Base directory of lustre sources. This parameter will cause lconf
                     to load modules from a source tree.
@@ -112,8 +118,12 @@ class Config:
         self._portals_dir = ''
        self._minlevel = 0
        self._maxlevel = 100
         self._portals_dir = ''
        self._minlevel = 0
        self._maxlevel = 100
-        self._timeout = -1
+        self._timeout = 0
         self._recovery_upcall = ''
         self._recovery_upcall = ''
+        self._ldapurl = ''
+        self._config_name = ''
+        self._select = {}
+        self._lctl_dump = ''
 
     def verbose(self, flag = None):
         if flag: self._verbose = flag
 
     def verbose(self, flag = None):
         if flag: self._verbose = flag
@@ -151,10 +161,6 @@ class Config:
         if val: self._node = val
         return self._node
 
         if val: self._node = val
         return self._node
 
-    def url(self, val = None):
-        if val: self._url = val
-        return self._url
-
     def gdb_script(self):
         if os.path.isdir('/r'):
             return '/r' + self._gdb_script
     def gdb_script(self):
         if os.path.isdir('/r'):
             return '/r' + self._gdb_script
@@ -170,7 +176,6 @@ class Config:
     def dump_file(self, val = None):
         if val: self._dump_file = val
         return self._dump_file
     def dump_file(self, val = None):
         if val: self._dump_file = val
         return self._dump_file
-
     def minlevel(self, val = None):
         if val: self._minlevel = int(val)
         return self._minlevel
     def minlevel(self, val = None):
         if val: self._minlevel = int(val)
         return self._minlevel
@@ -195,6 +200,31 @@ class Config:
         if val: self._recovery_upcall = val
         return self._recovery_upcall
 
         if val: self._recovery_upcall = val
         return self._recovery_upcall
 
+    def ldapurl(self, val = None):
+        if val: self._ldapurl = val
+        return self._ldapurl
+
+    def config_name(self, val = None):
+        if val: self._config_name = val
+        return self._config_name
+
+    def init_select(self, arg):
+        # arg = "service=nodeA,service2=nodeB"
+        list = string.split(arg, ',')
+        for entry in list:
+            srv, node = string.split(entry, '=')
+            self._select[srv] = node
+        
+    def select(self, srv):
+        if self._select.has_key(srv):
+            return self._select[srv]
+        return None
+
+    def lctl_dump(self, val = None):
+        if val: self._lctl_dump = val
+        return self._lctl_dump
+
+
 config = Config()
 
 # ============================================================ 
 config = Config()
 
 # ============================================================ 
@@ -254,6 +284,104 @@ class LconfError (exceptions.Exception):
 
 
 # ============================================================
 
 
 # ============================================================
+# handle daemons, like the acceptor
+class DaemonHandler:
+    """ Manage starting and stopping a daemon. Assumes daemon manages
+    it's own pid file. """
+
+    def __init__(self, cmd):
+        self.command = cmd
+        self.path =""
+
+    def start(self):
+        if self.running():
+            log(self.command, "already running.")
+        if not self.path:
+            self.path = find_prog(self.command)
+            if not self.path:
+                panic(self.command, "not found.")
+        ret, out = runcmd(self.path +' '+ self.command_line())
+        if ret:
+            raise CommandError(self.path, out, ret)
+
+    def stop(self):
+        if self.running():
+            pid = self.read_pidfile()
+            try:
+                log ("killing process", pid)
+                os.kill(pid, 15)
+                #time.sleep(1) # let daemon die
+            except OSError, e:
+                log("unable to kill", self.command, e)
+            if self.running():
+                log("unable to kill", self.command)
+
+    def running(self):
+        pid = self.read_pidfile()
+        if pid:
+            try:
+                os.kill(pid, 0)
+            except OSError:
+                self.clean_pidfile()
+            else:
+                return 1
+        return 0
+
+    def read_pidfile(self):
+        try:
+            fp = open(self.pidfile(), 'r')
+            pid = int(fp.read())
+            fp.close()
+            return pid
+        except IOError:
+            return 0
+        
+    def clean_pidfile(self):
+        """ Remove a stale pidfile """
+        log("removing stale pidfile:", self.pidfile())
+        try:
+            os.unlink(self.pidfile())
+        except OSError, e:
+            log(self.pidfile(), e)
+            
+class AcceptorHandler(DaemonHandler):
+    def __init__(self, port, net_type, send_mem, recv_mem, irq_aff, nid_xchg):
+        DaemonHandler.__init__(self, "acceptor")
+        self.port = port
+        self.flags = ''
+        self.send_mem = send_mem
+        self.recv_mem = recv_mem
+
+        if net_type == 'toe':
+            self.flags = self.flags + ' -N 4'
+        if irq_aff:
+            self.flags = self.flags + ' -i'
+        if nid_xchg:
+            self.flags = self.flags + ' -x'
+
+    def pidfile(self):
+        return "/var/run/%s-%d.pid" % (self.command, self.port)
+
+    def command_line(self):
+        return string.join(map(str,('-s', self.send_mem, '-r', self.recv_mem, self.flags, self.port)))
+    
+acceptors = {}
+
+# start the acceptors
+def run_acceptors():
+    for port in acceptors.keys():
+        daemon = acceptors[port]
+        if not daemon.running():
+            daemon.start()
+
+def stop_acceptor(port):
+    if acceptors.has_key(port):
+        daemon = acceptors[port]
+        if daemon.running():
+            daemon.stop()
+        
+
+# ============================================================
 # handle lctl interface
 class LCTLInterface:
     """
 # handle lctl interface
 class LCTLInterface:
     """
@@ -265,6 +393,7 @@ class LCTLInterface:
         Initialize close by finding the lctl binary.
         """
         self.lctl = find_prog(cmd)
         Initialize close by finding the lctl binary.
         """
         self.lctl = find_prog(cmd)
+        self.save_file = ''
         if not self.lctl:
             if config.noexec():
                 debug('! lctl not found')
         if not self.lctl:
             if config.noexec():
                 debug('! lctl not found')
@@ -272,6 +401,13 @@ class LCTLInterface:
             else:
                 raise CommandError('lctl', "unable to find lctl binary.")
 
             else:
                 raise CommandError('lctl', "unable to find lctl binary.")
 
+    def use_save_file(self, file):
+        self.save_file = file
+        
+    def set_nonblock(self, fd):
+        fl = fcntl.fcntl(fd, F_GETFL)
+        fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
+
     def run(self, cmds):
         """
         run lctl
     def run(self, cmds):
         """
         run lctl
@@ -281,21 +417,48 @@ class LCTLInterface:
         should modify command line to accept multiple commands, or
         create complex command line options
         """
         should modify command line to accept multiple commands, or
         create complex command line options
         """
-        debug("+", self.lctl, cmds)
+        cmd_line = self.lctl
+        if self.save_file:
+            cmds = '\n  dump ' + self.save_file + cmds
+
+        debug("+", cmd_line, cmds)
         if config.noexec(): return (0, [])
         if config.noexec(): return (0, [])
-        p = popen2.Popen3(self.lctl, 1)
-        p.tochild.write(cmds + "\n")
-        p.tochild.close()
-        out = p.fromchild.readlines()
-        err = p.childerr.readlines()
-        ret = p.wait()
+
+        child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
+        child.tochild.write(cmds + "\n")
+        child.tochild.close()
+
+        # From "Python Cookbook" from O'Reilly
+        outfile = child.fromchild
+        outfd = outfile.fileno()
+        self.set_nonblock(outfd)
+        errfile = child.childerr
+        errfd = errfile.fileno()
+        self.set_nonblock(errfd)
+
+        outdata = errdata = ''
+        outeof = erreof = 0
+        while 1:
+            ready = select.select([outfd,errfd],[],[]) # Wait for input
+            if outfd in ready[0]:
+                outchunk = outfile.read()
+                if outchunk == '': outeof = 1
+                outdata = outdata + outchunk
+            if errfd in ready[0]:
+                errchunk = errfile.read()
+                if errchunk == '': erreof = 1
+                errdata = errdata + errchunk
+            if outeof and erreof: break
+        # end of "borrowed" code
+
+        ret = child.wait()
         if os.WIFEXITED(ret):
             rc = os.WEXITSTATUS(ret)
         else:
             rc = 0
         if os.WIFEXITED(ret):
             rc = os.WEXITSTATUS(ret)
         else:
             rc = 0
-        if rc or len(err):
-            raise CommandError(self.lctl, err, rc)
-        return rc, out
+        if rc or len(errdata):
+            raise CommandError(self.lctl, errdata, rc)
+        return rc, outdata
 
     def runcmd(self, *args):
         """
 
     def runcmd(self, *args):
         """
@@ -317,33 +480,28 @@ class LCTLInterface:
             cmds =  """
   network %s
   mynid %s
             cmds =  """
   network %s
   mynid %s
-  add_uuid self %s
-  quit""" % (net, nid, nid)
-        else:
-            cmds =  """
-  network %s
-  add_uuid self %s
-  quit""" % (net, nid)
-            
-        self.run(cmds)
+  quit """ % (net, nid)
+            self.run(cmds)
 
     # create a new connection
 
     # create a new connection
-    def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
-        if net  in ('tcp', 'toe'):
-            cmds =  """
+    def connect(self, srv):
+        cmds =  "\n  add_uuid %s %s %s" % (srv.uuid, srv.nid, srv.net_type)
+        if srv.net_type  in ('tcp', 'toe') and not config.lctl_dump():
+            flags = ''
+            if srv.irq_affinity:
+                flags = flags + 'i'
+            if srv.nid_exchange:
+                flags = flags + 'x'
+            cmds =  """%s          
   network %s
   network %s
-  add_uuid %s %s
   send_mem %d
   recv_mem %d
   send_mem %d
   recv_mem %d
-  connect %s %d
-  quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port,  )
-        else:
-            cmds =  """
-  network %s
-  add_uuid %s %s
-  connect %s %d
-  quit""" % (net, servuuid, nid, nid, port,  )
-            
+  connect %s %d %s""" % (cmds, srv.net_type,
+             srv.send_mem,
+             srv.recv_mem,
+             srv.hostaddr, srv.port, flags )
+
+        cmds = cmds + "\n  quit"
         self.run(cmds)
                 
     # add a route to a range
         self.run(cmds)
                 
     # add a route to a range
@@ -351,7 +509,8 @@ class LCTLInterface:
         cmds =  """
   network %s
   add_route %s %s %s
         cmds =  """
   network %s
   add_route %s %s %s
-  quit  """ % (net, gw, lo, hi)
+  quit  """ % (net,
+               gw, lo, hi)
         self.run(cmds)
 
                 
         self.run(cmds)
 
                 
@@ -367,9 +526,11 @@ class LCTLInterface:
     def add_route_host(self, net, uuid, gw, tgt):
         cmds =  """
   network %s
     def add_route_host(self, net, uuid, gw, tgt):
         cmds =  """
   network %s
-  add_uuid %s %s
+  add_uuid %s %s %s
   add_route %s %s
   add_route %s %s
-  quit """ % (net, uuid, tgt, gw, tgt)
+  quit """ % (net,
+              uuid, tgt, net,
+              gw, tgt)
         self.run(cmds)
 
     # add a route to a range
         self.run(cmds)
 
     # add a route to a range
@@ -397,7 +558,6 @@ class LCTLInterface:
         cmds =  """
   ignore_errors
   network %s
         cmds =  """
   ignore_errors
   network %s
-  del_uuid self
   disconnect
   quit""" % (net)
         self.run(cmds)
   disconnect
   quit""" % (net)
         self.run(cmds)
@@ -416,8 +576,8 @@ class LCTLInterface:
         cmds = """
   ignore_errors
   device $%s
         cmds = """
   ignore_errors
   device $%s
-  cleanup
-  detach %s
+  cleanup %s
+  detach
   quit""" % (name, ('', 'force')[config.force()])
         self.run(cmds)
 
   quit""" % (name, ('', 'force')[config.force()])
         self.run(cmds)
 
@@ -454,8 +614,7 @@ class LCTLInterface:
 # Run a command and return the output and status.
 # stderr is sent to /dev/null, could use popen3 to
 # save it if necessary
 # Run a command and return the output and status.
 # stderr is sent to /dev/null, could use popen3 to
 # save it if necessary
-def run(*args):
-    cmd = string.join(map(str,args))
+def runcmd(cmd):
     debug ("+", cmd)
     if config.noexec(): return (0, [])
     f = os.popen(cmd + ' 2>&1')
     debug ("+", cmd)
     if config.noexec(): return (0, [])
     f = os.popen(cmd + ' 2>&1')
@@ -467,6 +626,10 @@ def run(*args):
         ret = 0
     return (ret, out)
 
         ret = 0
     return (ret, out)
 
+def run(*args):
+    cmd = string.join(map(str,args))
+    return runcmd(cmd)
+
 # Run a command in the background.
 def run_daemon(*args):
     cmd = string.join(map(str,args))
 # Run a command in the background.
 def run_daemon(*args):
     cmd = string.join(map(str,args))
@@ -487,7 +650,7 @@ def find_prog(cmd):
     cmdpath = os.path.dirname(sys.argv[0])
     syspath.insert(0, cmdpath);
     if config.portals_dir():
     cmdpath = os.path.dirname(sys.argv[0])
     syspath.insert(0, cmdpath);
     if config.portals_dir():
-        syspath.insert(0, os.path.join(cmdpath, config.portals_dir()+'/linux/utils/'))
+        syspath.insert(0, os.path.join(config.portals_dir()+'/linux/utils/'))
     for d in syspath:
         prog = os.path.join(d,cmd)
         if os.access(prog, os.X_OK):
     for d in syspath:
         prog = os.path.join(d,cmd)
         if os.access(prog, os.X_OK):
@@ -527,23 +690,20 @@ def is_block(path):
 
 # build fs according to type
 # fixme: dangerous
 
 # build fs according to type
 # fixme: dangerous
-def mkfs(fstype, dev):
+def mkfs(dev, devsize, fstype):
+    block_cnt = ''
+    if devsize:
+        # devsize is in 1k, and fs block count is in 4k
+        block_cnt = devsize/4
+
     if(fstype in ('ext3', 'extN')):
     if(fstype in ('ext3', 'extN')):
-        mkfs = 'mkfs.ext2 -j -b 4096'
+        mkfs = 'mkfs.ext2 -j -b 4096 -F '
     elif (fstype == 'reiserfs'):
     elif (fstype == 'reiserfs'):
-        mkfs = 'mkfs.reiserfs -f'
+        mkfs = 'mkreiserfs -ff'
     else:
         print 'unsupported fs type: ', fstype
     else:
         print 'unsupported fs type: ', fstype
-    if not is_block(dev):
-        if(fstype in ('ext3', 'extN')):
-            force = '-F'
-        elif (fstype == 'reiserfs'):
-            force = ''
-        else:
-            print 'unsupported fs type: ', fstype
-    else:
-        force = ''
-    (ret, out) = run (mkfs, force, dev)
+
+    (ret, out) = run (mkfs, dev, block_cnt)
     if ret:
         panic("Unable to build fs:", dev)
     # enable hash tree indexing on fsswe
     if ret:
         panic("Unable to build fs:", dev)
     # enable hash tree indexing on fsswe
@@ -587,8 +747,12 @@ def init_loop(file, size, fstype):
         return dev
     if config.reformat()  or not os.access(file, os.R_OK | os.W_OK):
         if size < 8000:
         return dev
     if config.reformat()  or not os.access(file, os.R_OK | os.W_OK):
         if size < 8000:
-            error(file, "size must be larger than 8MB")
-        run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size,  file))
+            panic(file, "size must be larger than 8MB, currently set to:", size)
+        (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size,
+                                                                         file))
+        if ret:
+            panic("Unable to create backing store:", file)
+
     loop = loop_base()
     # find next free loop
     for n in xrange(0, MAX_LOOP_DEVICES):
     loop = loop_base()
     # find next free loop
     for n in xrange(0, MAX_LOOP_DEVICES):
@@ -624,7 +788,7 @@ def block_dev(dev, size, fstype, format):
     if not is_block(dev):
         dev = init_loop(dev, size, fstype)
     if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
     if not is_block(dev):
         dev = init_loop(dev, size, fstype)
     if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
-        mkfs(fstype, dev)
+        mkfs(dev, size, fstype)
 
 #    else:
 #        panic("device:", dev,
 
 #    else:
 #        panic("device:", dev,
@@ -642,6 +806,16 @@ def if2addr(iface):
     ip = string.split(addr, ':')[1]
     return ip
 
     ip = string.split(addr, ':')[1]
     return ip
 
+def get_local_nid(net_type, wildcard):
+    """Return the local nid. First look for an elan interface,
+      then use the local address. """
+    local = ""
+    if os.access('/proc/elan/device0/position', os.R_OK):
+        local = get_local_address('elan', '*')
+    else:
+        local = get_local_address(net_type, wildcard)
+    return local
+        
 def get_local_address(net_type, wildcard):
     """Return the local address for the network type."""
     local = ""
 def get_local_address(net_type, wildcard):
     """Return the local address for the network type."""
     local = ""
@@ -676,6 +850,8 @@ def is_prepared(uuid):
     """Return true if a device exists for the uuid"""
     # expect this format:
     # 1 UP ldlm ldlm ldlm_UUID 2
     """Return true if a device exists for the uuid"""
     # expect this format:
     # 1 UP ldlm ldlm ldlm_UUID 2
+    if config.lctl_dump():
+        return 0
     try:
         out = lctl.device_list()
         for s in out:
     try:
         out = lctl.device_list()
         for s in out:
@@ -684,6 +860,21 @@ def is_prepared(uuid):
     except CommandError, e:
         e.dump()
     return 0
     except CommandError, e:
         e.dump()
     return 0
+
+def is_network_prepared():
+    """If the  PTLRPC device exists, then assumet that all networking
+       has been configured"""
+    if config.lctl_dump():
+        return 0
+    try:
+        out = lctl.device_list()
+        for s in out:
+            if 'RPCDEV_UUID' == string.split(s)[4]:
+                return 1
+    except CommandError, e:
+        e.dump()
+    return 0
+    
     
 def fs_is_mounted(path):
     """Return true if path is a mounted lustre filesystem"""
     
 def fs_is_mounted(path):
     """Return true if path is a mounted lustre filesystem"""
@@ -707,11 +898,11 @@ class Module:
     """ Base class for the rest of the modules. The default cleanup method is
     defined here, as well as some utilitiy funcs.
     """
     """ Base class for the rest of the modules. The default cleanup method is
     defined here, as well as some utilitiy funcs.
     """
-    def __init__(self, module_name, dom_node):
-        self.dom_node = dom_node
+    def __init__(self, module_name, db):
+        self.db = db
         self.module_name = module_name
         self.module_name = module_name
-        self.name = get_attr(dom_node, 'name')
-        self.uuid = get_attr(dom_node, 'uuid')
+        self.name = self.db.getName()
+        self.uuid = self.db.getUUID()
         self.kmodule_list = []
         self._server = None
         self._connected = 0
         self.kmodule_list = []
         self._server = None
         self._connected = 0
@@ -720,35 +911,16 @@ class Module:
         msg = string.join(map(str,args))
         print self.module_name + ":", self.name, self.uuid, msg
 
         msg = string.join(map(str,args))
         print self.module_name + ":", self.name, self.uuid, msg
 
-
-    def lookup_server(self, srv_uuid):
-        """ Lookup a server's network information """
-        net = get_ost_net(self.dom_node.parentNode, srv_uuid)
-        if not net:
-            panic ("Unable to find a server for:", srv_uuid)
-        self._server = Network(net)
-
-    def get_server(self):
-        return self._server
-
     def cleanup(self):
         """ default cleanup, used for most modules """
         self.info()
     def cleanup(self):
         """ default cleanup, used for most modules """
         self.info()
-        srv = self.get_server()
-        if srv and local_net(srv):
-            try:
-                lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
-            except CommandError, e:
-                log(self.module_name, "disconnect failed: ", self.name)
-                e.dump()
-                cleanup_error(e.rc)
         try:
             lctl.cleanup(self.name, self.uuid)
         except CommandError, e:
             log(self.module_name, "cleanup failed: ", self.name)
             e.dump()
             cleanup_error(e.rc)
         try:
             lctl.cleanup(self.name, self.uuid)
         except CommandError, e:
             log(self.module_name, "cleanup failed: ", self.name)
             e.dump()
             cleanup_error(e.rc)
-
+            
     def add_portals_module(self, dev_dir, modname):
         """Append a module to list of modules to load."""
         self.kmodule_list.append((config.portals_dir(), dev_dir, modname))
     def add_portals_module(self, dev_dir, modname):
         """Append a module to list of modules to load."""
         self.kmodule_list.append((config.portals_dir(), dev_dir, modname))
@@ -804,195 +976,189 @@ class Module:
                 log('! unable to unload module:', mod)
                 logall(out)
         
                 log('! unable to unload module:', mod)
                 logall(out)
         
-
 class Network(Module):
 class Network(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'NETWORK', dom_node)
-        self.net_type = get_attr(dom_node,'type')
-        self.nid = get_text(dom_node, 'server', '*')
-        self.port = get_text_int(dom_node, 'port', 0)
-        self.send_mem = get_text_int(dom_node, 'send_mem', DEFAULT_TCPBUF)
-        self.recv_mem = get_text_int(dom_node, 'recv_mem', DEFAULT_TCPBUF)
+    def __init__(self,db):
+        Module.__init__(self, 'NETWORK', db)
+        self.net_type = self.db.get_val('nettype')
+        self.nid = self.db.get_val('nid', '*')
+        self.port = self.db.get_val_int('port', 0)
+        self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF)
+        self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF)
+        self.irq_affinity = self.db.get_val_int('irqaffinity', 0)
+        self.nid_exchange = self.db.get_val_int('nidexchange', 0)
+
         if '*' in self.nid:
         if '*' in self.nid:
-            self.nid = get_local_address(self.net_type, self.nid)
+            self.nid = get_local_nid(self.net_type, self.nid)
             if not self.nid:
                 panic("unable to set nid for", self.net_type, self.nid)
             debug("nid:", self.nid)
 
             if not self.nid:
                 panic("unable to set nid for", self.net_type, self.nid)
             debug("nid:", self.nid)
 
+        self.hostaddr = self.db.get_val('hostaddr', self.nid)
+        if '*' in self.hostaddr:
+            self.hostaddr = get_local_address(self.net_type, self.hostaddr)
+            if not self.nid:
+                panic("unable to set nid for", self.net_type, self.hostaddr)
+            debug("hostaddr:", self.hostaddr)
+        # debug ( "hostaddr ", self.hostaddr, "net_type", self.net_type)
+
         self.add_portals_module("linux/oslib", 'portals')
         if node_needs_router():
             self.add_portals_module("linux/router", 'kptlrouter')
         if self.net_type == 'tcp':
             self.add_portals_module("linux/socknal", 'ksocknal')
         if self.net_type == 'toe':
         self.add_portals_module("linux/oslib", 'portals')
         if node_needs_router():
             self.add_portals_module("linux/router", 'kptlrouter')
         if self.net_type == 'tcp':
             self.add_portals_module("linux/socknal", 'ksocknal')
         if self.net_type == 'toe':
-            self.add_portals_odule("/linux/toenal", 'ktoenal')
+            self.add_portals_module("/linux/toenal", 'ktoenal')
         if self.net_type == 'elan':
             self.add_portals_module("/linux/rqswnal", 'kqswnal')
         if self.net_type == 'gm':
             self.add_portals_module("/linux/gmnal", 'kgmnal')
         self.add_lustre_module('obdclass', 'obdclass')
         if self.net_type == 'elan':
             self.add_portals_module("/linux/rqswnal", 'kqswnal')
         if self.net_type == 'gm':
             self.add_portals_module("/linux/gmnal", 'kgmnal')
         self.add_lustre_module('obdclass', 'obdclass')
-        self.add_lustre_module('ptlrpc', 'ptlrpc')
 
     def prepare(self):
 
     def prepare(self):
+        if is_network_prepared():
+            return
         self.info(self.net_type, self.nid, self.port)
         self.info(self.net_type, self.nid, self.port)
-        if self.net_type in ('tcp', 'toe'):
-            nal_id = '' # default is socknal
-            if self.net_type == 'toe':
-                nal_id = '-N 4'
-            ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, nal_id, self.port)
-            if ret:
-                raise CommandError(TCP_ACCEPTOR, out, ret)
-        ret = self.dom_node.getElementsByTagName('route_tbl')
-        for a in ret:
-            for r in a.getElementsByTagName('route'):
-                net_type = get_attr(r, 'type')
-                gw = get_attr(r, 'gw')
-                lo = get_attr(r, 'lo')
-                hi = get_attr(r,'hi', '')
-                lctl.add_route(net_type, gw, lo, hi)
-                if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
-                    srv = nid2server(self.dom_node.parentNode.parentNode, lo)
-                    if not srv:
-                        panic("no server for nid", lo)
-                    else:
-                        lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-
-            
         lctl.network(self.net_type, self.nid)
         lctl.network(self.net_type, self.nid)
-        lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
 
     def cleanup(self):
         self.info(self.net_type, self.nid, self.port)
 
     def cleanup(self):
         self.info(self.net_type, self.nid, self.port)
-        ret = self.dom_node.getElementsByTagName('route_tbl')
-        for a in ret:
-            for r in a.getElementsByTagName('route'):
-                lo = get_attr(r, 'lo')
-                hi = get_attr(r,'hi', '')
-                if self.net_type in ('tcp', 'toe') and hi == '':
-                    srv = nid2server(self.dom_node.parentNode.parentNode, lo)
-                    if not srv:
-                        panic("no server for nid", lo)
-                    else:
-                        try:
-                            lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
-                        except CommandError, e:
-                            print "disconnect failed: ", self.name
-                            e.dump()
-                            cleanup_error(e.rc)
-                try:
-                    lctl.del_route(self.net_type, self.nid, lo, hi)
-                except CommandError, e:
-                    print "del_route failed: ", self.name
-                    e.dump()
-                    cleanup_error(e.rc)
-              
-        try:
-            lctl.cleanup("RPCDEV", "RPCDEV_UUID")
-        except CommandError, e:
-            print "cleanup failed: ", self.name
-            e.dump()
-            cleanup_error(e.rc)
+        if self.net_type in ('tcp', 'toe'):
+            stop_acceptor(self.port)
         try:
             lctl.disconnectAll(self.net_type)
         except CommandError, e:
             print "disconnectAll failed: ", self.name
             e.dump()
             cleanup_error(e.rc)
         try:
             lctl.disconnectAll(self.net_type)
         except CommandError, e:
             print "disconnectAll failed: ", self.name
             e.dump()
             cleanup_error(e.rc)
-        if self.net_type in ('tcp', 'toe'):
-            # yikes, this ugly! need to save pid in /var/something
-            run("killall acceptor")
+
+class Router(Module):
+    def __init__(self,db):
+        Module.__init__(self, 'ROUTER', db)
+    def prepare(self):
+        if is_network_prepared():
+            return
+        self.info()
+        for net_type, gw, lo, hi in self.db.get_route_tbl():
+            lctl.add_route(net_type, gw, lo, hi)
+            if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+                srvdb = self.db.nid2server(lo, net_type)
+
+                if not srvdb:
+                    panic("no server for nid", lo)
+                else:
+                    srv = Network(srvdb)
+                    lctl.connect(srv)
+    def cleanup(self):
+        for net_type, gw, lo, hi in self.db.get_route_tbl():
+            if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+                srvdb = self.db.nid2server(lo, net_type)
+                if not srvdb:
+                    panic("no server for nid", lo)
+                else:
+                    srv = Network(srvdb)
+                    try:
+                        lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+                    except CommandError, e:
+                        print "disconnect failed: ", self.name
+                        e.dump()
+                        cleanup_error(e.rc)
+            try:
+                lctl.del_route(net_type, gw, lo, hi)
+            except CommandError, e:
+                print "del_route failed: ", self.name
+                e.dump()
+                cleanup_error(e.rc)
 
 class LDLM(Module):
 
 class LDLM(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'LDLM', dom_node)
+    def __init__(self,db):
+        Module.__init__(self, 'LDLM', db)
         self.add_lustre_module('ldlm', 'ldlm') 
     def prepare(self):
         if is_prepared(self.uuid):
             return
         self.info()
         self.add_lustre_module('ldlm', 'ldlm') 
     def prepare(self):
         if is_prepared(self.uuid):
             return
         self.info()
-        lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
-                    setup ="")
-
-class LOV(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'LOV', dom_node)
-        self.mds_uuid = get_first_ref(dom_node, 'mds')
-        mds= lookup(dom_node.parentNode, self.mds_uuid)
-        self.mds_name = getName(mds)
-        devs = dom_node.getElementsByTagName('devices')
-        if len(devs) > 0:
-            dev_node = devs[0]
-            self.stripe_sz = get_attr_int(dev_node, 'stripesize', 65536)
-            self.stripe_off = get_attr_int(dev_node, 'stripeoffset', 0)
-            self.pattern = get_attr_int(dev_node, 'pattern', 0)
-            self.devlist = get_all_refs(dev_node, 'obd')
-            self.stripe_cnt = get_attr_int(dev_node, 'stripecount', len(self.devlist))
-        self.add_lustre_module('mdc', 'mdc')
-        self.add_lustre_module('lov', 'lov')
+        lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid))
+    def cleanup(self):
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
 
 
+class PTLRPC(Module):
+    def __init__(self,db):
+        Module.__init__(self, 'PTLRPC', db)
+        self.add_lustre_module('ptlrpc', 'ptlrpc') 
     def prepare(self):
         if is_prepared(self.uuid):
             return
     def prepare(self):
         if is_prepared(self.uuid):
             return
+        self.info()
+        lctl.newdev(attach="ptlrpc %s %s" % (self.name, self.uuid))
+    def cleanup(self):
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
+
+class LOV(Module):
+    def __init__(self,db):
+        Module.__init__(self, 'LOV', db)
+        self.add_lustre_module('mdc', 'mdc')
+        self.add_lustre_module('lov', 'lov')
+        self.mds_uuid = self.db.get_first_ref('mds')
+        mds= self.db.lookup(self.mds_uuid)
+        self.mds_name = mds.getName()
+        self.stripe_sz = self.db.get_val_int('stripesize', 65536)
+        self.stripe_off = self.db.get_val_int('stripeoffset', 0)
+        self.pattern = self.db.get_val_int('stripepattern', 0)
+        self.devlist = self.db.get_refs('obd')
+        self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
+        self.osclist = []
+        self.mdc_uudi = ''
         for obd_uuid in self.devlist:
         for obd_uuid in self.devlist:
-            obd = lookup(self.dom_node.parentNode, obd_uuid)
-            osc = get_osc(obd)
+            obd = self.db.lookup(obd_uuid)
+            osc = get_osc(obd, self.name)
             if osc:
             if osc:
-                try:
-                    # Ignore connection failures, because the LOV will DTRT with
-                    # an unconnected OSC.
-                    osc.prepare(ignore_connect_failure=1)
-                except CommandError:
-                    print "Error preparing OSC %s (inactive)\n" % osc_uuid
+                self.osclist.append(osc)
             else:
             else:
-                panic('osc not found:', osc_uuid)
-        mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
+                panic('osc not found:', obd_uuid)
+            
+    def prepare(self):
+        if is_prepared(self.uuid):
+            return
+        for osc in self.osclist:
+            try:
+                # Ignore connection failures, because the LOV will DTRT with
+                # an unconnected OSC.
+                osc.prepare(ignore_connect_failure=1)
+            except CommandError:
+                print "Error preparing OSC %s (inactive)\n" % osc.uuid
+        self.mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
         self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
                   self.stripe_off, self.pattern, self.devlist, self.mds_name)
         lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
         self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
                   self.stripe_off, self.pattern, self.devlist, self.mds_name)
         lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
-                    setup ="%s" % (mdc_uuid))
+                    setup ="%s" % (self.mdc_uuid))
 
     def cleanup(self):
 
     def cleanup(self):
-        if not is_prepared(self.uuid):
-            return
-        for obd_uuid in self.devlist:
-            obd = lookup(self.dom_node.parentNode, obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.cleanup()
-            else:
-                panic('osc not found:', osc_uuid)
-        Module.cleanup(self)
-        cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
-
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
+        for osc in self.osclist:
+            osc.cleanup()
+        cleanup_mdc(self.db, self.name, self.mds_uuid)
 
     def load_module(self):
 
     def load_module(self):
-        for obd_uuid in self.devlist:
-            obd = lookup(self.dom_node.parentNode, obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.load_module()
-                break
-            else:
-                panic('osc not found:', osc_uuid)
+        for osc in self.osclist:
+            osc.load_module()
+            break
         Module.load_module(self)
 
         Module.load_module(self)
 
-
     def cleanup_module(self):
         Module.cleanup_module(self)
     def cleanup_module(self):
         Module.cleanup_module(self)
-        for obd_uuid in self.devlist:
-            obd = lookup(self.dom_node.parentNode, obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.cleanup_module()
-                break
-            else:
-                panic('osc not found:', osc_uuid)
+        for osc in self.osclist:
+            osc.cleanup_module()
+            break
 
 class LOVConfig(Module):
 
 class LOVConfig(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'LOVConfig', dom_node)
-        self.lov_uuid = get_first_ref(dom_node, 'lov')
-        l = lookup(dom_node.parentNode, self.lov_uuid)
+    def __init__(self,db):
+        Module.__init__(self, 'LOVConfig', db)
+
+        self.lov_uuid = self.db.get_first_ref('lov')
+        l = self.db.lookup(self.lov_uuid)
         self.lov = LOV(l)
         
     def prepare(self):
         self.lov = LOV(l)
         
     def prepare(self):
@@ -1007,29 +1173,59 @@ class LOVConfig(Module):
         #nothing to do here
         pass
 
         #nothing to do here
         pass
 
-
-class MDS(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'MDS', dom_node)
-        self.devname, self.size = get_device(dom_node)
-        self.fstype = get_text(dom_node, 'fstype')
+class MDSDEV(Module):
+    def __init__(self,db):
+        Module.__init__(self, 'MDSDEV', db)
+        self.devpath = self.db.get_val('devpath','')
+        self.size = self.db.get_val_int('devsize', 0)
+        self.fstype = self.db.get_val('fstype', '')
+        # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
+        target_uuid = self.db.get_first_ref('target')
+        mds = self.db.lookup(target_uuid)
+        self.name = mds.getName()
+        self.lovconfig_uuids = mds.get_refs('lovconfig')
         # FIXME: if fstype not set, then determine based on kernel version
         # FIXME: if fstype not set, then determine based on kernel version
-        self.format = get_text(dom_node, 'autoformat', "no")
+        self.format = self.db.get_val('autoformat', "no")
+
+        active_uuid = mds.get_active_target()
+        if not active_uuid:
+            panic("No target device found:", target_uuid)
+        if active_uuid == self.uuid:
+            self.active = 1
+        else:
+            self.active = 0
+        self.target_dev_uuid = self.uuid
+        self.uuid = target_uuid
+        # modules
         if self.fstype == 'extN':
             self.add_lustre_module('extN', 'extN') 
         self.add_lustre_module('mds', 'mds')
         if self.fstype == 'extN':
             self.add_lustre_module('extN', 'extN') 
         self.add_lustre_module('mds', 'mds')
-        self.add_lustre_module('obdclass', 'fsfilt_%s'%(self.fstype))
+        if self.fstype:
+            self.add_lustre_module('obdclass', 'fsfilt_%s' % (self.fstype))
+
+    def load_module(self):
+        if self.active:
+            Module.load_module(self)
             
     def prepare(self):
         if is_prepared(self.uuid):
             return
             
     def prepare(self):
         if is_prepared(self.uuid):
             return
-        self.info(self.devname, self.fstype, self.format)
-        blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
+        if not self.active:
+            debug(self.uuid, "not active")
+            return
+        self.info(self.devpath, self.fstype, self.format)
+        run_acceptors()
+        blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
         if not is_prepared('MDT_UUID'):
             lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'),
                         setup ="")
         lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
                     setup ="%s %s" %(blkdev, self.fstype))
         if not is_prepared('MDT_UUID'):
             lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'),
                         setup ="")
         lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
                     setup ="%s %s" %(blkdev, self.fstype))
+        for uuid in self.lovconfig_uuids:
+            db = self.db.lookup(uuid)
+            lovconfig = LOVConfig(db)
+            lovconfig.prepare()
+            
     def cleanup(self):
         if is_prepared('MDT_UUID'):
             try:
     def cleanup(self):
         if is_prepared('MDT_UUID'):
             try:
@@ -1038,79 +1234,171 @@ class MDS(Module):
                 print "cleanup failed: ", self.name
                 e.dump()
                 cleanup_error(e.rc)
                 print "cleanup failed: ", self.name
                 e.dump()
                 cleanup_error(e.rc)
-        if not is_prepared(self.uuid):
-            return
-        Module.cleanup(self)
-        clean_loop(self.devname)
-
-# Very unusual case, as there is no MDC element in the XML anymore
-# Builds itself from an MDS node
-class MDC(Module):
-    def __init__(self,dom_node):
-        self.mds = MDS(dom_node)
-        self.dom_node = dom_node
-        self.module_name = 'MDC'
-        self.kmodule_list = []
-        self._server = None
-        self._connected = 0
-
-        host = socket.gethostname()
-        self.name = 'MDC_%s' % (self.mds.name)
-        self.uuid = '%s_%05x_%05x' % (self.name, int(random.random() * 1048576),
-                                      int(random.random() * 1048576))
-
-        self.lookup_server(self.mds.uuid)
-        self.add_lustre_module('mdc', 'mdc')
-
-    def prepare(self):
         if is_prepared(self.uuid):
         if is_prepared(self.uuid):
-            return
-        self.info(self.mds.uuid)
-        srv = self.get_server()
-        lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-        lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
-                        setup ="%s %s" %(self.mds.uuid, srv.uuid))
-            
-class OBD(Module):
-    def __init__(self, dom_node):
-        Module.__init__(self, 'OBD', dom_node)
-        self.obdtype = get_attr(dom_node, 'type')
-        self.devname, self.size = get_device(dom_node)
-        self.fstype = get_text(dom_node, 'fstype')
-        self.active_target = get_text(dom_node, 'active_target')
+            Module.cleanup(self)
+        clean_loop(self.devpath)
+
+class OSD(Module):
+    def __init__(self, db):
+        Module.__init__(self, 'OSD', db)
+        self.osdtype = self.db.get_val('osdtype')
+        self.devpath = self.db.get_val('devpath', '')
+        self.size = self.db.get_val_int('devsize', 0)
+        self.fstype = self.db.get_val('fstype', '')
+        target_uuid = self.db.get_first_ref('target')
+        ost = self.db.lookup(target_uuid)
+        self.name = ost.getName()
         # FIXME: if fstype not set, then determine based on kernel version
         # FIXME: if fstype not set, then determine based on kernel version
-        self.format = get_text(dom_node, 'autoformat', 'yes')
+        self.format = self.db.get_val('autoformat', 'yes')
         if self.fstype == 'extN':
             self.add_lustre_module('extN', 'extN') 
         if self.fstype == 'extN':
             self.add_lustre_module('extN', 'extN') 
-        self.add_lustre_module(self.obdtype, self.obdtype)
+
+        active_uuid = ost.get_active_target()
+        if not active_uuid:
+            panic("No target device found:", target_uuid)
+        if active_uuid == self.uuid:
+            self.active = 1
+        else:
+            self.active = 0
+        self.target_dev_uuid = self.uuid
+        self.uuid = target_uuid
+        # modules
+        self.add_lustre_module('ost', 'ost')
+        self.add_lustre_module(self.osdtype, self.osdtype)
         if self.fstype:
             self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
 
         if self.fstype:
             self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
 
+    def load_module(self):
+        if self.active:
+            Module.load_module(self)
+
     # need to check /proc/mounts and /etc/mtab before
     # formatting anything.
     # FIXME: check if device is already formatted.
     def prepare(self):
         if is_prepared(self.uuid):
             return
     # need to check /proc/mounts and /etc/mtab before
     # formatting anything.
     # FIXME: check if device is already formatted.
     def prepare(self):
         if is_prepared(self.uuid):
             return
-        self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
-        if self.obdtype == 'obdecho':
+        if not self.active:
+            debug(self.uuid, "not active")
+            return
+        self.info(self.osdtype, self.devpath, self.size, self.fstype, self.format)
+        run_acceptors()
+        if self.osdtype == 'obdecho':
             blkdev = ''
         else:
             blkdev = ''
         else:
-            blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
-        lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
+            blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
+        lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid),
                     setup ="%s %s" %(blkdev, self.fstype))
                     setup ="%s %s" %(blkdev, self.fstype))
+        if not is_prepared('OSS_UUID'):
+            lctl.newdev(attach="ost %s %s" % ('OSS', 'OSS_UUID'),
+                        setup ="")
+
     def cleanup(self):
     def cleanup(self):
-        if not is_prepared(self.uuid):
+        if is_prepared('OSS_UUID'):
+            try:
+                lctl.cleanup("OSS", "OSS_UUID")
+            except CommandError, e:
+                print "cleanup failed: ", self.name
+                e.dump()
+                cleanup_error(e.rc)
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
+        if not self.osdtype == 'obdecho':
+            clean_loop(self.devpath)
+
+# Generic client module, used by OSC and MDC
+class Client(Module):
+    def __init__(self, tgtdb, module, owner):
+        self.target_name = tgtdb.getName()
+        self.target_uuid = tgtdb.getUUID()
+        self.db = tgtdb
+
+        self.tgt_dev_uuid = tgtdb.get_active_target()
+        if not self.tgt_dev_uuid:
+            panic("No target device found for target:", self.target_name)
+            
+        self.kmodule_list = []
+        self._server = None
+        self._connected = 0
+
+        self.module = module
+        self.module_name = string.upper(module)
+        self.name = '%s_%s_%s' % (self.module_name, owner, self.target_name)
+        self.uuid = '%05x%05x_%.14s_%05x%05x' % (int(random.random() * 1048576),
+                                              int(random.random() * 1048576),self.name,
+                                              int(random.random() * 1048576),
+                                              int(random.random() * 1048576))
+        self.uuid = self.uuid[0:36]
+        self.lookup_server(self.tgt_dev_uuid)
+        self.add_lustre_module(module, module)
+
+    def lookup_server(self, srv_uuid):
+        """ Lookup a server's network information """
+        self._server_nets = self.db.get_ost_net(srv_uuid)
+        if len(self._server_nets) == 0:
+            panic ("Unable to find a server for:", srv_uuid)
+
+    def get_servers(self):
+        return self._server_nets
+
+    def prepare(self, ignore_connect_failure = 0):
+        if is_prepared(self.uuid):
             return
             return
+        self.info(self.target_uuid)
+        try:
+            srv = local_net(self.get_servers())
+            if srv:
+                lctl.connect(srv)
+            else:
+                srv, r =  find_route(self.get_servers())
+                if srv:
+                    lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
+                else:
+                    panic ("no route to",  self.target_uuid)
+        except CommandError:
+            if (ignore_connect_failure == 0):
+                pass
+        if srv:
+            lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
+                        setup ="%s %s" %(self.target_uuid, srv.uuid))
+
+    def cleanup(self):
         Module.cleanup(self)
         Module.cleanup(self)
-        if not self.obdtype == 'obdecho':
-            clean_loop(self.devname)
+        srv = local_net(self.get_servers())
+        if srv:
+            try:
+                lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+            except CommandError, e:
+                log(self.module_name, "disconnect failed: ", self.name)
+                e.dump()
+                cleanup_error(e.rc)
+        else:
+            self.info(self.target_uuid)
+            srv, r =  find_route(self.get_servers())
+            if srv:
+                try:
+                    lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
+                except CommandError, e:
+                    print "del_route failed: ", self.name
+                    e.dump()
+                    cleanup_error(e.rc)
+
+
+
+class MDC(Client):
+    def __init__(self, db, owner):
+         Client.__init__(self, db, 'mdc', owner)
 
 
+class OSC(Client):
+    def __init__(self, db, owner):
+         Client.__init__(self, db, 'osc', owner)
+
+            
 class COBD(Module):
 class COBD(Module):
-    def __init__(self, dom_node):
-        Module.__init__(self, 'COBD', dom_node)
-        self.real_uuid = get_first_ref(dom_node, 'real_obd')
-        self.cache_uuid = get_first_ref(dom_node, 'cache_obd')
+    def __init__(self, db):
+        Module.__init__(self, 'COBD', db)
+        self.real_uuid = self.db.get_first_ref('realobd')
+        self.cache_uuid = self.db.get_first_ref('cacheobd')
         self.add_lustre_module('cobd' , 'cobd')
 
     # need to check /proc/mounts and /etc/mtab before
         self.add_lustre_module('cobd' , 'cobd')
 
     # need to check /proc/mounts and /etc/mtab before
@@ -1123,28 +1411,15 @@ class COBD(Module):
         lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
                     setup ="%s %s" %(self.real_uuid, self.cache_uuid))
 
         lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
                     setup ="%s %s" %(self.real_uuid, self.cache_uuid))
 
-class OST(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'OST', dom_node)
-        self.obd_uuid = get_first_ref(dom_node, 'obd')
-        self.add_lustre_module('ost', 'ost')
-
-    def prepare(self):
-        if is_prepared(self.uuid):
-            return
-        self.info(self.obd_uuid)
-        lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
-                    setup ="%s" % (self.obd_uuid))
-
 
 # virtual interface for  OSC and LOV
 class VOSC(Module):
 
 # virtual interface for  OSC and LOV
 class VOSC(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'VOSC', dom_node)
-        if dom_node.nodeName == 'lov':
-            self.osc = LOV(dom_node)
+    def __init__(self,db, owner):
+        Module.__init__(self, 'VOSC', db)
+        if db.get_class() == 'lov':
+            self.osc = LOV(db)
         else:
         else:
-            self.osc = get_osc(dom_node)
+            self.osc = get_osc(db, owner)
     def get_uuid(self):
         return self.osc.uuid
     def prepare(self):
     def get_uuid(self):
         return self.osc.uuid
     def prepare(self):
@@ -1155,81 +1430,34 @@ class VOSC(Module):
         self.osc.load_module()
     def cleanup_module(self):
         self.osc.cleanup_module()
         self.osc.load_module()
     def cleanup_module(self):
         self.osc.cleanup_module()
-        
-
-class OSC(Module):
-    def __init__(self, dom_node, obd_name, obd_uuid, ost_uuid):
-        self.dom_node = dom_node
-        self.module_name = 'OSC'
-        self.name = 'OSC_%s' % (obd_name)
-        self.uuid = '%s_%05x' % (self.name, int(random.random() * 1048576))
-        self.kmodule_list = []
-        self._server = None
-        self._connected = 0
-
-        self.obd_uuid = obd_uuid
-        self.ost_uuid = ost_uuid
-        self.lookup_server(self.ost_uuid)
-        self.add_lustre_module('osc', 'osc')
+    def need_mdc(self):
+        return self.db.get_class() != 'lov'
+    def get_mdc_uuid(self):
+        if self.db.get_class() == 'lov':
+            return self.osc.mdc_uuid
+        return ''
 
 
-    def prepare(self, ignore_connect_failure = 0):
-        if is_prepared(self.uuid):
-            return
-        self.info(self.obd_uuid, self.ost_uuid)
-        srv = self.get_server()
-        try:
-            if local_net(srv):
-                lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-            else:
-                r =  find_route(srv)
-                if r:
-                    lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
-                else:
-                    panic ("no route to",  srv.nid)
-        except CommandError:
-            if (ignore_connect_failure == 0):
-                pass
-            
-        lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
-                    setup ="%s %s" %(self.obd_uuid, srv.uuid))
-
-    def cleanup(self):
-        srv = self.get_server()
-        if local_net(srv):
-            Module.cleanup(self)
-        else:
-            self.info(self.obd_uuid, self.ost_uuid)
-            r =  find_route(srv)
-            if r:
-                try:
-                    lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
-                except CommandError, e:
-                    print "del_route failed: ", self.name
-                    e.dump()
-                    cleanup_error(e.rc)
-            Module.cleanup(self)
-            
 
 class ECHO_CLIENT(Module):
 
 class ECHO_CLIENT(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'ECHO_CLIENT', dom_node)
+    def __init__(self,db):
+        Module.__init__(self, 'ECHO_CLIENT', db)
         self.add_lustre_module('obdecho', 'obdecho')
         self.add_lustre_module('obdecho', 'obdecho')
-        self.obd_uuid = get_first_ref(dom_node, 'obd')
-        obd = lookup(self.dom_node.parentNode, self.obd_uuid)
-        self.osc = VOSC(obd)
+        self.obd_uuid = self.db.get_first_ref('obd')
+        obd = self.db.lookup(self.obd_uuid)
+        self.osc = VOSC(obd, self.name)
 
     def prepare(self):
         if is_prepared(self.uuid):
             return
         self.osc.prepare() # XXX This is so cheating. -p
         self.info(self.obd_uuid)
 
     def prepare(self):
         if is_prepared(self.uuid):
             return
         self.osc.prepare() # XXX This is so cheating. -p
         self.info(self.obd_uuid)
-            
+
         lctl.newdev(attach="echo_client %s %s" % (self.name, self.uuid),
         lctl.newdev(attach="echo_client %s %s" % (self.name, self.uuid),
-                    setup = self.obd_uuid)
+                    setup = self.osc.get_uuid())
 
     def cleanup(self):
 
     def cleanup(self):
-        if not is_prepared(self.uuid):
-            return
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
         self.osc.cleanup()
 
     def load_module(self):
         self.osc.cleanup()
 
     def load_module(self):
@@ -1241,23 +1469,29 @@ class ECHO_CLIENT(Module):
 
 
 class Mountpoint(Module):
 
 
 class Mountpoint(Module):
-    def __init__(self,dom_node):
-        Module.__init__(self, 'MTPT', dom_node)
-        self.path = get_text(dom_node, 'path')
-        self.mds_uuid = get_first_ref(dom_node, 'mds')
-        self.obd_uuid = get_first_ref(dom_node, 'obd')
-        self.add_lustre_module('mdc', 'mdc')
+    def __init__(self,db):
+        Module.__init__(self, 'MTPT', db)
+        self.path = self.db.get_val('path')
+        self.mds_uuid = self.db.get_first_ref('mds')
+        self.obd_uuid = self.db.get_first_ref('obd')
+        obd = self.db.lookup(self.obd_uuid)
+        self.vosc = VOSC(obd, self.name)
+        if self.vosc.need_mdc():
+            self.add_lustre_module('mdc', 'mdc')
         self.add_lustre_module('llite', 'llite')
         self.add_lustre_module('llite', 'llite')
-        obd = lookup(self.dom_node.parentNode, self.obd_uuid)
-        self.osc = VOSC(obd)
 
 
     def prepare(self):
 
 
     def prepare(self):
-        self.osc.prepare()
-        mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
+        self.vosc.prepare()
+        if self.vosc.need_mdc():
+            mdc_uuid = prepare_mdc(self.db, self.name,  self.mds_uuid)
+        else:
+            mdc_uuid = self.vosc.get_mdc_uuid()
+        if not mdc_uuid:
+            panic("Unable to determine MDC UUID. Probably need to cleanup before re-mounting.")
         self.info(self.path, self.mds_uuid, self.obd_uuid)
         cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
         self.info(self.path, self.mds_uuid, self.obd_uuid)
         cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
-              (self.osc.get_uuid(), mdc_uuid, self.path)
+              (self.vosc.get_uuid(), mdc_uuid, self.path)
         run("mkdir", self.path)
         ret, val = run(cmd)
         if ret:
         run("mkdir", self.path)
         ret, val = run(cmd)
         if ret:
@@ -1276,218 +1510,465 @@ class Mountpoint(Module):
         if fs_is_mounted(self.path):
             panic("fs is still mounted:", self.path)
 
         if fs_is_mounted(self.path):
             panic("fs is still mounted:", self.path)
 
-        self.osc.cleanup()
-        cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
+        self.vosc.cleanup()
+        if self.vosc.need_mdc():
+            cleanup_mdc(self.db, self.name, self.mds_uuid)
 
     def load_module(self):
 
     def load_module(self):
-        self.osc.load_module()
+        self.vosc.load_module()
         Module.load_module(self)
     def cleanup_module(self):
         Module.cleanup_module(self)
         Module.load_module(self)
     def cleanup_module(self):
         Module.cleanup_module(self)
-        self.osc.cleanup_module()
+        self.vosc.cleanup_module()
 
 
 # ============================================================
 # XML processing and query
 
 
 
 # ============================================================
 # XML processing and query
 
-# OSC is no longer in the xml, so we have to fake it.
-# this is getting ugly and begging for another refactoring
-def get_osc(obd_dom):
-    obd = OBD(obd_dom)
-    osc = OSC(obd_dom, obd.name, obd.uuid, obd.active_target)
-    return osc
-
-
-def get_device(obd):
-    list = obd.getElementsByTagName('device')
-    if len(list) > 0:
-        dev = list[0]
-        dev.normalize();
-        size = get_attr_int(dev, 'size', 0)
-        return dev.firstChild.data, size
-    return '', 0
-
-# Get the text content from the first matching child
-# If there is no content (or it is all whitespace), return
-# the default
-def get_text(dom_node, tag, default=""):
-    list = dom_node.getElementsByTagName(tag)
-    if len(list) > 0:
-        dom_node = list[0]
-        dom_node.normalize()
-        if dom_node.firstChild:
-            txt = string.strip(dom_node.firstChild.data)
-            if txt:
-                return txt
-    return default
-
-def get_text_int(dom_node, tag, default=0):
-    list = dom_node.getElementsByTagName(tag)
-    n = default
-    if len(list) > 0:
-        dom_node = list[0]
-        dom_node.normalize()
-        if dom_node.firstChild:
-            txt = string.strip(dom_node.firstChild.data)
-            if txt:
-                try:
-                    n = int(txt)
-                except ValueError:
-                    panic("text value is not integer:", txt)
-    return n
+class LustreDB:
+    def lookup(self, uuid):
+        """ lookup returns a new LustreDB instance"""
+        return self._lookup_by_uuid(uuid)
+
+    def lookup_name(self, name, class_name = ""):
+        """ lookup returns a new LustreDB instance"""
+        return self._lookup_by_name(name, class_name)
+
+    def lookup_class(self, class_name):
+        """ lookup returns a new LustreDB instance"""
+        return self._lookup_by_class(class_name)
+
+    def get_val(self, tag, default=None):
+        v =  self._get_val(tag)
+        if v:
+            return v
+        if default != None:
+            return default
+        debug("LustreDB", self.getName(), " no value for:", tag)
+        return None
 
 
-def get_attr(dom_node, attr, default=""):
-    v = dom_node.getAttribute(attr)
-    if v:
-        return v
-    return default
+    def get_class(self):
+        return self._get_class()
 
 
-def get_attr_int(dom_node, attr, default=0):
-    n = default
-    v = dom_node.getAttribute(attr)
-    if v:
+    def get_val_int(self, tag, default=0):
+        str = self._get_val(tag)
         try:
         try:
-            n = int(v)
+            if str:
+                return int(str)
+            return default
         except ValueError:
         except ValueError:
-            panic("attr value is not integer", v)
-    return n
-
-def get_first_ref(dom_node, tag):
-    """ Get the first uuidref of the type TAG. Used one only
-    one is expected.  Returns the uuid."""
-    uuid = None
-    refname = '%s_ref' % tag
-    list = dom_node.getElementsByTagName(refname)
-    if len(list) > 0:
-        uuid = getRef(list[0])
-    return uuid
+            panic("text value is not integer:", str)
+            
+    def get_first_ref(self, tag):
+        """ Get the first uuidref of the type TAG. Only
+        one is expected.  Returns the uuid."""
+        uuids = self._get_refs(tag)
+        if len(uuids) > 0:
+            return  uuids[0]
+        return None
     
     
-def get_all_refs(dom_node, tag):
-    """ Get all the refs of type TAG.  Returns list of uuids. """
-    uuids = []
-    refname = '%s_ref' % tag
-    list = dom_node.getElementsByTagName(refname)
-    if len(list) > 0:
-        for i in list:
-            uuids.append(getRef(i))
-    return uuids
-
-def get_ost_net(dom_node, uuid):
-    ost = lookup(dom_node, uuid)
-    uuid = get_first_ref(ost, 'network')
-    if not uuid:
+    def get_refs(self, tag):
+        """ Get all the refs of type TAG.  Returns list of uuids. """
+        uuids = self._get_refs(tag)
+        return uuids
+
+    def get_all_refs(self):
+        """ Get all the refs.  Returns list of uuids. """
+        uuids = self._get_all_refs()
+        return uuids
+
+    def get_ost_net(self, osd_uuid):
+        srv_list = []
+        if not osd_uuid:
+            return srv_list
+        osd = self.lookup(osd_uuid)
+        node_uuid = osd.get_first_ref('node')
+        node = self.lookup(node_uuid)
+        if not node:
+            panic("unable to find node for osd_uuid:", osd_uuid,
+                  " node_ref:", node_uuid)
+        for net_uuid in node.get_networks():
+            db = node.lookup(net_uuid)
+            srv_list.append(Network(db))
+        return srv_list
+
+    def nid2server(self, nid, net_type):
+        netlist = self.lookup_class('network')
+        for net_db in netlist:
+            if net_db.get_val('nid') == nid and net_db.get_val('nettype') == net_type: 
+                return net_db
         return None
         return None
-    return lookup(dom_node, uuid)
-
-def nid2server(dom_node, nid):
-    netlist = dom_node.getElementsByTagName('network')
-    for net_node in netlist:
-        if get_text(net_node, 'server') == nid:
-            return Network(net_node)
-    return None
     
     
-def lookup(dom_node, uuid):
-    for n in dom_node.childNodes:
-        if n.nodeType == n.ELEMENT_NODE:
-            if getUUID(n) == uuid:
-                return n
+    # the tag name is the service type
+    # fixme: this should do some checks to make sure the dom_node is a service
+    #
+    # determine what "level" a particular node is at.
+    
+    # the order of iniitailization is based on level. 
+    def getServiceLevel(self):
+        type = self.get_class()
+        ret=0;
+        if type in ('network',):
+            ret = 5
+        elif type in ('routetbl',):
+            ret = 6
+        elif type in ('ptlrpc',):
+            ret = 7
+        elif type in ('device', 'ldlm'):
+            ret = 20
+        elif type in ('osd', 'mdd', 'cobd'):
+            ret = 30
+        elif type in ('mdsdev','ost'):
+            ret = 40
+        elif type in ('mdc','osc'):
+            ret = 50
+        elif type in ('lov',):
+            ret = 60
+        elif type in ('mountpoint', 'echoclient'):
+            ret = 70
+
+        if ret < config.minlevel() or ret > config.maxlevel():
+            ret = 0 
+        return ret
+    
+    #
+    # return list of services in a profile. list is a list of tuples
+    # [(level, db_object),]
+    def getServices(self):
+        list = []
+        for ref_class, ref_uuid in self.get_all_refs(): 
+                servdb = self.lookup(ref_uuid)
+                if  servdb:
+                    level = servdb.getServiceLevel()
+                    if level > 0:
+                        list.append((level, servdb))
+                else:
+                    panic('service not found: ' + ref_uuid)
+                    
+        list.sort()
+        return list
+
+    # Find the target_device for target on a node
+    # node->profiles->device_refs->target
+    def get_target_device(self, target_uuid, node_name):
+        node_db = self.lookup_name(node_name)
+        if not node_db:
+            return None
+        prof_list = node_db.get_refs('profile')
+        for prof_uuid in prof_list:
+            prof_db = node_db.lookup(prof_uuid)
+            ref_list = prof_db.get_all_refs()
+            for ref in ref_list:
+                dev = self.lookup(ref[1])
+                if dev and dev.get_first_ref('target') == target_uuid:
+                    return ref[1]
+        return None
+
+    def get_active_target(self):
+        target_uuid = self.getUUID()
+        target_name = self.getName()
+        node_name = config.select(target_name)
+        if node_name:
+            tgt_dev_uuid = self.get_target_device(target_uuid, node_name)
+        else:
+            tgt_dev_uuid = self.get_first_ref('active')
+        return tgt_dev_uuid
+        
+
+    # get all network uuids for this node
+    def get_networks(self):
+        ret = []
+        prof_list = self.get_refs('profile')
+        for prof_uuid in prof_list:
+            prof_db = self.lookup(prof_uuid)
+            net_list = prof_db.get_refs('network')
+            #debug("get_networks():", prof_uuid, net_list)
+            for net_uuid in net_list:
+                ret.append(net_uuid)
+        return ret
+
+class LustreDB_XML(LustreDB):
+    def __init__(self, dom, root_node):
+        # init xmlfile
+        self.dom_node = dom
+        self.root_node = root_node
+
+    def xmltext(self, dom_node, tag):
+        list = dom_node.getElementsByTagName(tag)
+        if len(list) > 0:
+            dom_node = list[0]
+            dom_node.normalize()
+            if dom_node.firstChild:
+                txt = string.strip(dom_node.firstChild.data)
+                if txt:
+                    return txt
+
+    def xmlattr(self, dom_node, attr):
+        return dom_node.getAttribute(attr)
+
+    def _get_val(self, tag):
+        """a value could be an attribute of the current node
+        or the text value in a child node"""
+        ret  = self.xmlattr(self.dom_node, tag)
+        if not ret:
+            ret = self.xmltext(self.dom_node, tag)
+        return ret
+
+    def _get_class(self):
+        return self.dom_node.nodeName
+
+    #
+    # [(ref_class, ref_uuid),]
+    def _get_all_refs(self):
+        list = []
+        for n in self.dom_node.childNodes: 
+            if n.nodeType == n.ELEMENT_NODE:
+                ref_uuid = self.xml_get_ref(n)
+                ref_class = n.nodeName
+                list.append((ref_class, ref_uuid))
+                    
+        list.sort()
+        return list
+
+    def _get_refs(self, tag):
+        """ Get all the refs of type TAG.  Returns list of uuids. """
+        uuids = []
+        refname = '%s_ref' % tag
+        reflist = self.dom_node.getElementsByTagName(refname)
+        for r in reflist:
+            uuids.append(self.xml_get_ref(r))
+        return uuids
+
+    def xmllookup_by_uuid(self, dom_node, uuid):
+        for n in dom_node.childNodes:
+            if n.nodeType == n.ELEMENT_NODE:
+                if self.xml_get_uuid(n) == uuid:
+                    return n
+                else:
+                    n = self.xmllookup_by_uuid(n, uuid)
+                    if n: return n
+        return None
+
+    def _lookup_by_uuid(self, uuid):
+        dom = self. xmllookup_by_uuid(self.root_node, uuid)
+        if dom:
+            return LustreDB_XML(dom, self.root_node)
+
+    def xmllookup_by_name(self, dom_node, name):
+        for n in dom_node.childNodes:
+            if n.nodeType == n.ELEMENT_NODE:
+                if self.xml_get_name(n) == name:
+                    return n
+                else:
+                    n = self.xmllookup_by_name(n, name)
+                    if n: return n
+        return None
+
+    def _lookup_by_name(self, name, class_name):
+        dom = self.xmllookup_by_name(self.root_node, name)
+        if dom:
+            return LustreDB_XML(dom, self.root_node)
+
+    def xmllookup_by_class(self, dom_node, class_name):
+        return dom_node.getElementsByTagName(class_name)
+
+    def _lookup_by_class(self, class_name):
+        ret = []
+        domlist = self.xmllookup_by_class(self.root_node, class_name)
+        for node in domlist:
+            ret.append(LustreDB_XML(node, self.root_node))
+        return ret
+
+    def xml_get_name(self, n):
+        return n.getAttribute('name')
+        
+    def getName(self):
+        return self.xml_get_name(self.dom_node)
+
+    def xml_get_ref(self, n):
+        return n.getAttribute('uuidref')
+
+    def xml_get_uuid(self, dom_node):
+        return dom_node.getAttribute('uuid')
+
+    def getUUID(self):
+        return self.xml_get_uuid(self.dom_node)
+
+    def get_routes(self, type, gw):
+        """ Return the routes as a list of tuples of the form:
+        [(type, gw, lo, hi),]"""
+        res = []
+        tbl = self.dom_node.getElementsByTagName('routetbl')
+        for t in tbl:
+            routes = t.getElementsByTagName('route')
+            for r in routes:
+                net_type = self.xmlattr(r, 'type')
+                if type != net_type:
+                    lo = self.xmlattr(r, 'lo')
+                    hi = self.xmlattr(r, 'hi')
+                    res.append((type, gw, lo, hi))
+        return res
+
+    def get_route_tbl(self):
+        ret = []
+        for r in self.dom_node.getElementsByTagName('route'):
+            net_type = self.xmlattr(r, 'type')
+            gw = self.xmlattr(r, 'gw')
+            lo = self.xmlattr(r, 'lo')
+            hi = self.xmlattr(r, 'hi')
+            ret.append((net_type, gw, lo, hi))
+        return ret
+
+
+# ================================================================    
+# LDAP Support
+class LustreDB_LDAP(LustreDB):
+    def __init__(self, name, attrs,
+                 base = "fs=lustre",
+                 parent = None,
+                 url  = "ldap://localhost",
+                 user = "cn=Manager, fs=lustre",
+                 pw   = "secret"
+                 ):
+        self._name = name
+        self._attrs = attrs
+        self._base = base
+        self._parent = parent
+        self._url  = url
+        self._user = user
+        self._pw   = pw
+        if parent:
+            self.l = parent.l
+            self._base = parent._base
+        else:
+            self.open()
+
+    def open(self):
+        import ldap
+        try:
+            self.l = ldap.initialize(self._url)
+            # Set LDAP protocol version used
+            self.l.protocol_version=ldap.VERSION3
+            # user and pw only needed if modifying db
+            self.l.bind_s("", "", ldap.AUTH_SIMPLE);
+        except ldap.LDAPError, e:
+            panic(e)
+            # FIXME, do something useful here
+
+    def close(self):
+        self.l.unbind_s()
+
+    def ldap_search(self, filter):
+        """Return list of uuids matching the filter."""
+        import ldap
+        dn = self._base
+        ret = []
+        uuids = []
+        try:
+            for name, attrs in self.l.search_s(dn, ldap.SCOPE_ONELEVEL,
+                                        filter, ["uuid"]):
+                for v in attrs['uuid']:
+                    uuids.append(v)
+        except ldap.NO_SUCH_OBJECT, e:
+            pass
+        except ldap.LDAPError, e:
+            print e                     # FIXME: die here?
+        if len(uuids) > 0:
+            for uuid in uuids:
+                ret.append(self._lookup_by_uuid(uuid))
+        return ret
+
+    def _lookup_by_name(self, name, class_name):
+        list =  self.ldap_search("lustreName=%s" %(name))
+        if len(list) == 1:
+            return list[0]
+        return []
+
+    def _lookup_by_class(self, class_name):
+        return self.ldap_search("objectclass=%s" %(string.upper(class_name)))
+
+    def _lookup_by_uuid(self, uuid):
+        import ldap
+        dn = "uuid=%s,%s" % (uuid, self._base)
+        ret = None
+        try:
+            for name, attrs in self.l.search_s(dn, ldap.SCOPE_BASE,
+                                               "objectclass=*"):
+                ret = LustreDB_LDAP(name, attrs,  parent = self)
+                        
+        except ldap.NO_SUCH_OBJECT, e:
+            debug("NO_SUCH_OBJECT:", uuid)
+            pass                        # just return empty list
+        except ldap.LDAPError, e:
+            print e                     # FIXME: die here?
+        return ret
+
+
+    def _get_val(self, k):
+        ret = None
+        if self._attrs.has_key(k):
+            v = self._attrs[k]
+            if type(v) == types.ListType:
+                ret = str(v[0])
             else:
             else:
-                n = lookup(n, uuid)
-                if n: return n
-    return None
-            
-# Get name attribute of dom_node
-def getName(dom_node):
-    return dom_node.getAttribute('name')
+                ret = str(v)
+        return ret
 
 
-def getRef(dom_node):
-    return dom_node.getAttribute('uuidref')
+    def _get_class(self):
+        return string.lower(self._attrs['objectClass'][0])
 
 
-# Get name attribute of dom_node
-def getUUID(dom_node):
-    return dom_node.getAttribute('uuid')
+    #
+    # [(ref_class, ref_uuid),]
+    def _get_all_refs(self):
+        list = []
+        for k in self._attrs.keys():
+            if re.search('.*Ref', k):
+                for uuid in self._attrs[k]:
+                    list.append((k, uuid))
+        return list
 
 
-# the tag name is the service type
-# fixme: this should do some checks to make sure the dom_node is a service
-def getServiceType(dom_node):
-    return dom_node.nodeName
+    def _get_refs(self, tag):
+        """ Get all the refs of type TAG.  Returns list of uuids. """
+        uuids = []
+        refname = '%sRef' % tag
+        if self._attrs.has_key(refname):
+            return self._attrs[refname]
+        return []
 
 
-#
-# determine what "level" a particular node is at.
-# the order of iniitailization is based on level. 
-def getServiceLevel(dom_node):
-    type = getServiceType(dom_node)
-    ret=0;
-    if type in ('network',):
-        ret = 10
-    elif type in ('device', 'ldlm'):
-        ret = 20
-    elif type in ('obd', 'mdd', 'cobd'):
-        ret = 30
-    elif type in ('mds','ost'):
-        ret = 40
-    elif type in ('mdc','osc'):
-        ret = 50
-    elif type in ('lov', 'lovconfig'):
-        ret = 60
-    elif type in ('mountpoint', 'echo_client'):
-        ret = 70
-
-    if ret < config.minlevel() or ret > config.maxlevel():
-        ret = 0 
-    return ret
+    def getName(self):
+        return self._get_val('lustreName')
 
 
-#
-# return list of services in a profile. list is a list of tuples
-# [(level, dom_node),]
-def getServices(lustreNode, profileNode):
-    list = []
-    for n in profileNode.childNodes: 
-        if n.nodeType == n.ELEMENT_NODE:
-            servNode = lookup(lustreNode, getRef(n))
-            if not servNode:
-                print n
-                panic('service not found: ' + getRef(n))
-            level = getServiceLevel(servNode)
-           if level > 0:
-                list.append((level, servNode))
-    list.sort()
-    return list
-
-def getByName(lustreNode, name, tag):
-    ndList = lustreNode.getElementsByTagName(tag)
-    for nd in ndList:
-        if getName(nd) == name:
-            return nd
-    return None
-    
+    def getUUID(self):
+        return self._get_val('uuid')
+
+    def get_route_tbl(self):
+        return []
 
 ############################################################
 # MDC UUID hack - 
 # FIXME: clean this mess up!
 #
 
 ############################################################
 # MDC UUID hack - 
 # FIXME: clean this mess up!
 #
-saved_mdc = {}
-def prepare_mdc(dom_node, mds_uuid):
-    global saved_mdc
-    mds_node = lookup(dom_node, mds_uuid);
-    if not mds_node:
+# OSC is no longer in the xml, so we have to fake it.
+# this is getting ugly and begging for another refactoring
+def get_osc(ost_db, owner):
+    osc = OSC(ost_db, owner)
+    return osc
+
+def get_mdc(db, owner, mds_uuid):
+    mds_db = db.lookup(mds_uuid);
+    if not mds_db:
         panic("no mds:", mds_uuid)
         panic("no mds:", mds_uuid)
-    if saved_mdc.has_key(mds_uuid):
-        return saved_mdc[mds_uuid]
-    mdc = MDC(mds_node)
+    mdc = MDC(mds_db, owner)
+    return mdc
+
+def prepare_mdc(db, owner, mds_uuid):
+    mdc = get_mdc(db, owner, mds_uuid)
     mdc.prepare()
     mdc.prepare()
-    saved_mdc[mds_uuid] = mdc.uuid
     return mdc.uuid
 
     return mdc.uuid
 
-def cleanup_mdc(dom_node, mds_uuid):
-    global saved_mdc
-    mds_node = lookup(dom_node, mds_uuid);
-    if not mds_node:
-        panic("no mds:", mds_uuid)
-    if not saved_mdc.has_key(mds_uuid):
-        mdc = MDC(mds_node)
-        mdc.cleanup()
-        saved_mdc[mds_uuid] = mdc.uuid
+def cleanup_mdc(db, owner, mds_uuid):
+    mdc = get_mdc(db, owner, mds_uuid)
+    mdc.cleanup()
         
 
 ############################################################
         
 
 ############################################################
@@ -1497,125 +1978,111 @@ routes = []
 local_node = []
 router_flag = 0
 
 local_node = []
 router_flag = 0
 
-def init_node(dom_node):
-    global local_node, router_flag
-    netlist = dom_node.getElementsByTagName('network')
-    for dom_net in netlist:
-        type = get_attr(dom_net, 'type')
-        gw = get_text(dom_net, 'server')
-        local_node.append((type, gw))
+def add_local_interfaces(node_db):
+    global local_node
+    for netuuid in node_db.get_networks():
+        net = node_db.lookup(netuuid)
+        srv = Network(net)
+        debug("add_local", netuuid)
+        local_node.append((srv.net_type, srv.nid))
+        if acceptors.has_key(srv.port):
+            panic("duplicate port:", srv.port)
+        if srv.net_type in ('tcp', 'toe'):
+            acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type,
+                                                  srv.send_mem, srv.recv_mem,
+                                                  srv.irq_affinity,
+                                                  srv.nid_exchange)
 
 def node_needs_router():
     return router_flag
 
 
 def node_needs_router():
     return router_flag
 
-def get_routes(type, gw, dom_net):
-    """ Return the routes as a list of tuples of the form:
-        [(type, gw, lo, hi),]"""
-    res = []
-    tbl = dom_net.getElementsByTagName('route_tbl')
-    for t in tbl:
-        routes = t.getElementsByTagName('route')
-        for r in routes:
-            lo = get_attr(r, 'lo')
-            hi = get_attr(r, 'hi', '')
-            res.append((type, gw, lo, hi))
-    return res
-    
-
 def init_route_config(lustre):
     """ Scan the lustre config looking for routers.  Build list of
     routes. """
     global routes, router_flag
     routes = []
 def init_route_config(lustre):
     """ Scan the lustre config looking for routers.  Build list of
     routes. """
     global routes, router_flag
     routes = []
-    list = lustre.getElementsByTagName('node')
-    for node in list:
-        if get_attr(node, 'router'):
+    list = lustre.lookup_class('node')
+    for node_db in list:
+        if node_db.get_val_int('router', 0):
             router_flag = 1
             router_flag = 1
+            #debug("init_route_config: found router", node_db.getName())
             for (local_type, local_nid) in local_node:
             for (local_type, local_nid) in local_node:
+                #debug("init_route_config:", local_type, local_nid)
                 gw = None
                 gw = None
-                netlist = node.getElementsByTagName('network')
-                for dom_net in netlist:
-                    if local_type == get_attr(dom_net, 'type'):
-                        gw = get_text(dom_net, 'server')
+                for netuuid in node_db.get_networks():
+                    db = node_db.lookup(netuuid)
+                    if local_type == db.get_val('nettype'):
+                        gw = db.get_val('nid')
                         break
                         break
+                #debug("init_route_config: gw is", gw)
                 if not gw:
                     continue
                 if not gw:
                     continue
-                for dom_net in netlist:
-                    if local_type != get_attr(dom_net, 'type'):
-                        for route in get_routes(local_type, gw, dom_net):
-                            routes.append(route)
-    
+                for route in node_db.get_routes(local_type, gw):
+                    routes.append(route)
+    debug("init_route_config routes:", routes)
+
 
 
-def local_net(net):
+def local_net(srv_list):
     global local_node
     for iface in local_node:
     global local_node
     for iface in local_node:
-        if net.net_type == iface[0]:
+        for srv in srv_list:
+            #debug("local_net a:", srv.net_type, "b:", iface[0])
+            if srv.net_type == iface[0]:
+                return srv
+    return None
+
+def local_net_type(net_type):
+    global local_node
+    for iface in local_node:
+        if net_type == iface[0]:
             return 1
     return 0
 
             return 1
     return 0
 
-def find_route(net):
+def find_route(srv_list):
     global local_node, routes
     frm_type = local_node[0][0]
     global local_node, routes
     frm_type = local_node[0][0]
-    to_type = net.net_type
-    to = net.nid
-    debug ('looking for route to', to_type,to)
-    for r in routes:
-        if  r[2] == to:
-            return r
-    return None
+    for srv in srv_list:
+        #debug("find_route: srv:", srv.hostaddr, "type: ", srv.net_type)
+        to_type = srv.net_type
+        to = srv.hostaddr
+        #debug ('looking for route to', to_type, to)
+        for r in routes:
+            #debug("find_route: ", r)
+            if  r[2] == to:
+                return srv, r
+    return None,None
            
            
-    
-        
 
 ############################################################
 # lconf level logic
 # Start a service.
 
 ############################################################
 # lconf level logic
 # Start a service.
-def startService(dom_node, module_flag):
-    type = getServiceType(dom_node)
-    debug('Service:', type, getName(dom_node), getUUID(dom_node))
-    # there must be a more dynamic way of doing this...
+def newService(db):
+    type = db.get_class()
+    debug('Service:', type, db.getName(), db.getUUID())
     n = None
     if type == 'ldlm':
     n = None
     if type == 'ldlm':
-        n = LDLM(dom_node)
+        n = LDLM(db)
+    elif type == 'ptlrpc':
+        n = PTLRPC(db)
     elif type == 'lov':
     elif type == 'lov':
-        n = LOV(dom_node)
-    elif type == 'lovconfig':
-        n = LOVConfig(dom_node)
+        n = LOV(db)
     elif type == 'network':
     elif type == 'network':
-        n = Network(dom_node)
-    elif type == 'obd':
-        n = OBD(dom_node)
+        n = Network(db)
+    elif type == 'routetbl':
+        n = Router(db)
+    elif type == 'osd':
+        n = OSD(db)
     elif type == 'cobd':
     elif type == 'cobd':
-        n = COBD(dom_node)
-    elif type == 'ost':
-        n = OST(dom_node)
-    elif type == 'mds':
-        n = MDS(dom_node)
-    elif type == 'osc':
-        n = VOSC(dom_node)
-    elif type == 'mdc':
-        n = MDC(dom_node)
+        n = COBD(db)
+    elif type == 'mdsdev':
+        n = MDSDEV(db)
     elif type == 'mountpoint':
     elif type == 'mountpoint':
-        n = Mountpoint(dom_node)
-    elif type == 'echo_client':
-        n = ECHO_CLIENT(dom_node)
+        n = Mountpoint(db)
+    elif type == 'echoclient':
+        n = ECHO_CLIENT(db)
     else:
         panic ("unknown service type:", type)
     else:
         panic ("unknown service type:", type)
-
-    if module_flag:
-        if config.nomod():
-            return
-        if config.cleanup():
-            n.cleanup_module()
-        else:
-            n.load_module()
-    else:
-        if config.nosetup():
-            return
-        if config.cleanup():
-            n.cleanup()
-        else:
-            n.prepare()
+    return n
 
 #
 # Prepare the system to run lustre using a particular profile
 
 #
 # Prepare the system to run lustre using a particular profile
@@ -1625,63 +2092,105 @@ def startService(dom_node, module_flag):
 #  * make sure partitions are in place and prepared
 #  * initialize devices with lctl
 # Levels is important, and needs to be enforced.
 #  * make sure partitions are in place and prepared
 #  * initialize devices with lctl
 # Levels is important, and needs to be enforced.
-def startProfile(lustreNode, profileNode, module_flag):
-    if not profileNode:
-        panic("profile:", profile, "not found.")
-    services = getServices(lustreNode, profileNode)
-    if config.cleanup():
-        services.reverse()
+def for_each_profile(db, prof_list, operation):
+    for prof_uuid in prof_list:
+        prof_db = db.lookup(prof_uuid)
+        if not prof_db:
+            panic("profile:", profile, "not found.")
+        services = prof_db.getServices()
+        operation(services)
+        
+def doSetup(services):
+    if config.nosetup():
+        return
     for s in services:
     for s in services:
-        startService(s[1], module_flag)
+        n = newService(s[1])
+        n.prepare()
+    
+def doModules(services):
+    if config.nomod():
+        return
+    for s in services:
+        n = newService(s[1])
+        n.load_module()
 
 
+def doCleanup(services):
+    if config.nosetup():
+        return
+    services.reverse()
+    for s in services:
+        n = newService(s[1])
+        n.cleanup()
+
+def doUnloadModules(services):
+    if config.nomod():
+        return
+    services.reverse()
+    for s in services:
+        n = newService(s[1])
+        n.cleanup_module()
 
 #
 # Load profile for 
 
 #
 # Load profile for 
-def doHost(lustreNode, hosts):
+def doHost(lustreDB, hosts):
     global routes
     global router_flag 
     global routes
     global router_flag 
-    dom_node = None
+    node_db = None
     for h in hosts:
     for h in hosts:
-        dom_node = getByName(lustreNode, h, 'node')
-        if dom_node:
+        node_db = lustreDB.lookup_name(h, 'node')
+        if node_db:
             break
             break
-    if not dom_node:
+    if not node_db:
         print 'No host entry found.'
         return
 
         print 'No host entry found.'
         return
 
-    if get_attr(dom_node, 'router'):
-        router_flag = 1
-    else:
-        router_flag = 0
-    recovery_upcall = get_attr(dom_node, 'recovery_upcall')
-    timeout = get_attr_int(dom_node, 'timeout')
+    router_flag = node_db.get_val_int('router', 0)
+    recovery_upcall = node_db.get_val('recovery_upcall', '')
+    timeout = node_db.get_val_int('timeout', 0)
 
 
+    add_local_interfaces(node_db)
     if not router_flag:
     if not router_flag:
-        init_node(dom_node)
-        init_route_config(lustreNode)
+        init_route_config(lustreDB)
 
     # Two step process: (1) load modules, (2) setup lustre
     # if not cleaning, load modules first.
 
     # Two step process: (1) load modules, (2) setup lustre
     # if not cleaning, load modules first.
-    module_flag = not config.cleanup()
-    reflist = dom_node.getElementsByTagName('profile')
-    for profile in reflist:
-            startProfile(lustreNode,  profile, module_flag)
+    prof_list = node_db.get_refs('profile')
+
+    if config.cleanup():
+        if config.force():
+            # the command line can override this value
+            timeout = 5
+        # ugly hack, only need to run lctl commands for --dump
+        if config.lctl_dump():
+            for_each_profile(node_db, prof_list, doCleanup)
+            return
+
+        sys_set_timeout(timeout)
+        sys_set_recovery_upcall(recovery_upcall)
+
+        for_each_profile(node_db, prof_list, doCleanup)
+        for_each_profile(node_db, prof_list, doUnloadModules)
+
+    else:
+        # ugly hack, only need to run lctl commands for --dump
+        if config.lctl_dump():
+            for_each_profile(node_db, prof_list, doSetup)
+            return
+
+        for_each_profile(node_db, prof_list, doModules)
 
 
-    if not config.cleanup():
         sys_set_debug_path()
         script = config.gdb_script()
         run(lctl.lctl, ' modules >', script)
         if config.gdb():
         sys_set_debug_path()
         script = config.gdb_script()
         run(lctl.lctl, ' modules >', script)
         if config.gdb():
-            # dump /tmp/ogdb and sleep/pause here
             log ("The GDB module script is in", script)
             log ("The GDB module script is in", script)
+            # pause, so user has time to break and
+            # load the script
             time.sleep(5)
         sys_set_timeout(timeout)
         sys_set_recovery_upcall(recovery_upcall)
             time.sleep(5)
         sys_set_timeout(timeout)
         sys_set_recovery_upcall(recovery_upcall)
-            
-            
-    module_flag = not module_flag
-    for profile in reflist:
-            startProfile(lustreNode,  profile, module_flag)
+
+        for_each_profile(node_db, prof_list, doSetup)
 
 ############################################################
 # Command line processing
 
 ############################################################
 # Command line processing
@@ -1692,7 +2201,8 @@ def parse_cmdline(argv):
                  "portals=", "makeldiff", "cleanup", "noexec",
                  "help", "node=", "nomod", "nosetup",
                  "dump=", "force", "minlevel=", "maxlevel=",
                  "portals=", "makeldiff", "cleanup", "noexec",
                  "help", "node=", "nomod", "nosetup",
                  "dump=", "force", "minlevel=", "maxlevel=",
-                 "timeout=", "recovery_upcall="]
+                 "timeout=", "recovery_upcall=",
+                 "ldapurl=", "config=", "select=", "lctl_dump="]
     opts = []
     args = []
 
     opts = []
     args = []
 
@@ -1711,7 +2221,6 @@ def parse_cmdline(argv):
             config.verbose(1)
         if o in ("-n", "--noexec"):
             config.noexec(1)
             config.verbose(1)
         if o in ("-n", "--noexec"):
             config.noexec(1)
-            config.verbose(1)
         if o == "--portals":
             config.portals_dir(a)
         if o == "--lustre":
         if o == "--portals":
             config.portals_dir(a)
         if o == "--lustre":
@@ -1730,14 +2239,23 @@ def parse_cmdline(argv):
             config.dump_file(a)
         if o in ("-f", "--force"):
             config.force(1)
             config.dump_file(a)
         if o in ("-f", "--force"):
             config.force(1)
-       if o in ("--minlevel",):
+       if o == "--minlevel":
                config.minlevel(a)
                config.minlevel(a)
-        if o in ("--maxlevel",):
+        if o == "--maxlevel":
                 config.maxlevel(a)
                 config.maxlevel(a)
-        if o in ("--timeout",):
+        if o == "--timeout":
                 config.timeout(a)
                 config.timeout(a)
-        if o in ("--recovery_upcall",):
+        if o == "--recovery_upcall":
                 config.recovery_upcall(a)
                 config.recovery_upcall(a)
+        if o == "--ldapurl":
+                config.ldapurl(a)
+        if o == "--config":
+                config.config_name(a)
+        if o == "--select":
+                config.init_select(a)
+        if o == "--lctl_dump":
+            config.lctl_dump(a)
+
     return args
 
 def fetch(url):
     return args
 
 def fetch(url):
@@ -1793,9 +2311,9 @@ def sys_set_recovery_upcall(upcall):
 
 def sys_set_timeout(timeout):
     # the command overrides the value in the node config
 
 def sys_set_timeout(timeout):
     # the command overrides the value in the node config
-    if config.timeout() >= 0:
+    if config.timeout() > 0:
         timeout = config.timeout()
         timeout = config.timeout()
-    if timeout >= 0:
+    if timeout > 0:
         debug("setting timeout:", timeout)
         sysctl('lustre/timeout', timeout)
 
         debug("setting timeout:", timeout)
         sysctl('lustre/timeout', timeout)
 
@@ -1846,7 +2364,7 @@ def sanitise_path():
 # Shutdown does steps in reverse
 #
 def main():
 # Shutdown does steps in reverse
 #
 def main():
-    global TCP_ACCEPTOR, lctl, MAXTCPBUF
+    global  lctl, MAXTCPBUF
 
     host = socket.gethostname()
 
 
     host = socket.gethostname()
 
@@ -1867,10 +2385,17 @@ def main():
         if not os.access(args[0], os.R_OK):
             print 'File not found or readable:', args[0]
             sys.exit(1)
         if not os.access(args[0], os.R_OK):
             print 'File not found or readable:', args[0]
             sys.exit(1)
-        dom = xml.dom.minidom.parse(args[0])
-    elif config.url():
-        xmldata = fetch(config.url())
-        dom = xml.dom.minidom.parseString(xmldata)
+        try:
+            dom = xml.dom.minidom.parse(args[0])
+        except Exception:
+            panic("%s does not appear to be a config file." % (args[0]))
+            sys.exit(1) # make sure to die here, even in debug mode.
+        db = LustreDB_XML(dom.documentElement, dom.documentElement)
+    elif config.ldapurl():
+        if not config.config_name():
+            panic("--ldapurl requires --config name")
+        dn = "config=%s,fs=lustre" % (config.config_name())
+        db = LustreDB_LDAP('', {}, base=dn, url = config.ldapurl())
     else:
         usage()
 
     else:
         usage()
 
@@ -1889,20 +2414,15 @@ def main():
 
     setupModulePath(sys.argv[0])
 
 
     setupModulePath(sys.argv[0])
 
-    TCP_ACCEPTOR = find_prog('acceptor')
-    if not TCP_ACCEPTOR:
-        if config.noexec():
-            TCP_ACCEPTOR = 'acceptor'
-            debug('! acceptor not found')
-        else:
-            panic('acceptor not found')
-
     lctl = LCTLInterface('lctl')
     lctl = LCTLInterface('lctl')
+    if config.lctl_dump():
+        lctl.use_save_file(config.lctl_dump())
+    else:
+        sys_make_devices()
+        sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
+        sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
 
 
-    sys_make_devices()
-    sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
-    sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
-    doHost(dom.documentElement, node_list)
+    doHost(db, node_list)
 
 if __name__ == "__main__":
     try:
 
 if __name__ == "__main__":
     try:
@@ -1915,4 +2435,4 @@ if __name__ == "__main__":
 
     if first_cleanup_error:
         sys.exit(first_cleanup_error)
 
     if first_cleanup_error:
         sys.exit(first_cleanup_error)
-
+