Whamcloud - gitweb
- merge 0.7rc1 from b_devel to HEAD (20030612 merge point)
[fs/lustre-release.git] / lustre / utils / lconf
similarity index 60%
rename from lustre/utils/lconf.in
rename to lustre/utils/lconf
index cbe05dd..7b31fef 100755 (executable)
@@ -26,7 +26,7 @@
 
 import sys, getopt, types
 import string, os, stat, popen2, socket, time, random, fcntl, select
-import re, exceptions
+import re, exceptions, signal
 import xml.dom.minidom
 
 if sys.version[0] == '1':
@@ -34,6 +34,19 @@ if sys.version[0] == '1':
 else:
     from fcntl import F_GETFL, F_SETFL
 
+PYMOD_DIR = "/usr/lib/lustre/python"
+
+def development_mode():
+    base = os.path.dirname(sys.argv[0])
+    if os.access(base+"/Makefile.am", os.R_OK):
+        return 1
+    return 0
+
+if not development_mode():
+    sys.path.append(PYMOD_DIR)
+
+import Lustre
+
 # Global parameters
 MAXTCPBUF = 1048576
 DEFAULT_TCPBUF = 1048576
@@ -41,7 +54,61 @@ DEFAULT_TCPBUF = 1048576
 # Maximum number of devices to search for.
 # (the /dev/loop* nodes need to be created beforehand)
 MAX_LOOP_DEVICES = 256
-PORTALS_DIR = '@PORTALSLOC@'
+PORTALS_DIR = 'portals'
+
+
+# Please keep these uptodate with the values in portals/kp30.h
+ptldebug_names = { 
+    "trace" :     (1 << 0),
+    "inode" :     (1 << 1),
+    "super" :     (1 << 2),
+    "ext2" :      (1 << 3),
+    "malloc" :    (1 << 4),
+    "cache" :     (1 << 5),
+    "info" :      (1 << 6),
+    "ioctl" :     (1 << 7),
+    "blocks" :    (1 << 8),
+    "net" :       (1 << 9),
+    "warning" :   (1 << 10),
+    "buffs" :     (1 << 11),
+    "other" :     (1 << 12),
+    "dentry" :    (1 << 13),
+    "portals" :   (1 << 14),
+    "page" :      (1 << 15),
+    "dlmtrace" :  (1 << 16),
+    "error" :     (1 << 17),
+    "emerg" :     (1 << 18),
+    "ha" :        (1 << 19),
+    "rpctrace" :  (1 << 20),
+    "vfstrace" :  (1 << 21),
+    }
+
+subsystem_names = {
+    "undefined" :    (0 << 24),
+    "mdc" :          (1 << 24),
+    "mds" :          (2 << 24),
+    "osc" :          (3 << 24),
+    "ost" :          (4 << 24),
+    "class" :        (5 << 24),
+    "obdfs" :        (6 << 24),
+    "llite" :        (7 << 24),
+    "rpc" :          (8 << 24),
+    "ext2obd" :      (9 << 24),
+    "portals" :     (10 << 24),
+    "socknal" :     (11 << 24),
+    "qswnal" :      (12 << 24),
+    "pinger" :      (13 << 24),
+    "filter" :      (14 << 24),
+    "trace" :       (15 << 24),
+    "echo" :        (16 << 24),
+    "ldlm" :        (17 << 24),
+    "lov" :         (18 << 24),
+    "gmnal" :       (19 << 24),
+    "ptlrouter" :   (20 << 24),
+    "cobd" :        (21 << 24),
+    "ptlbd" :       (22 << 24),
+    }
+
 
 first_cleanup_error = 0
 def cleanup_error(rc):
@@ -49,194 +116,16 @@ def cleanup_error(rc):
     if not first_cleanup_error:
         first_cleanup_error = rc
 
-
-def usage():
-    print """usage: lconf config.xml
-
-config.xml          Lustre configuration in xml format.
---ldapurl           LDAP server URL, eg. ldap://localhost
---config            Cluster config name used for LDAP query
---node <nodename>   Load config for <nodename>
---select service=nodeA,service2=nodeB   U
--d | --cleanup      Cleans up config. (Shutdown)
--f | --force        Forced unmounting and/or obd detach during cleanup
--v | --verbose      Print system commands as they are run
--h | --help         Print this help 
---gdb               Prints message after creating gdb module script
-                    and sleeps for 5 seconds.
--n | --noexec       Prints the commands and steps that will be run for a
-                    config without executing them. This can used to check if a
-                    config file is doing what it should be doing. (Implies -v)
---nomod             Skip load/unload module step.
---nosetup           Skip device setup/cleanup step.
---reformat          Reformat all devices (without question)
---dump <file>       Dump the kernel debug log before portals is unloaded
---minlevel <num>    Specify the minimum level of services to configure/cleanup (default 0)
---maxlevel <num>    Specify the maximum level of services to configure/cleanup (default 100)
-                    Levels are aproximatly like:
-                            10 - network
-                            20 - device, ldlm
-                            30 - osd, mdd
-                            40 - mds, ost
-                            50 - mdc, osc
-                            60 - lov
-                            70 - mountpoint, echo_client
---lustre=src_dir    Base directory of lustre sources. This parameter will cause lconf
-                    to load modules from a source tree.
---portals=src_dir   Portals source directory.  If this is a relative path, then it is
-                    assumed to be relative to lustre. 
-
-"""
-    TODO = """
---ldap server       LDAP server with lustre config database
---makeldiff         Translate xml source to LDIFF 
-This are perhaps not needed:
-"""
-    sys.exit()
-
-# ============================================================
-# Config parameters, encapsulated in a class
-class Config:
-    def __init__(self):
-        # flags
-        self._noexec = 0
-        self._verbose = 0
-        self._reformat = 0
-        self._cleanup = 0
-        self._gdb = 0
-        self._nomod = 0
-        self._nosetup = 0
-        self._force = 0
-        # parameters
-        self._modules = None
-        self._node = None
-        self._url = None
-        self._gdb_script = '/tmp/ogdb'
-        self._debug_path = '/tmp/lustre-log'
-        self._dump_file = None
-        self._lustre_dir = ''
-        self._portals_dir = ''
-       self._minlevel = 0
-       self._maxlevel = 100
-        self._timeout = 0
-        self._recovery_upcall = ''
-        self._ldapurl = ''
-        self._config_name = ''
-        self._select = {}
-        self._lctl_dump = ''
-
-    def verbose(self, flag = None):
-        if flag: self._verbose = flag
-        return self._verbose
-
-    def noexec(self, flag = None):
-        if flag: self._noexec = flag
-        return self._noexec
-
-    def reformat(self, flag = None):
-        if flag: self._reformat = flag
-        return self._reformat
-
-    def cleanup(self, flag = None):
-        if flag: self._cleanup = flag
-        return self._cleanup
-
-    def gdb(self, flag = None):
-        if flag: self._gdb = flag
-        return self._gdb
-
-    def nomod(self, flag = None):
-        if flag: self._nomod = flag
-        return self._nomod
-
-    def nosetup(self, flag = None):
-        if flag: self._nosetup = flag
-        return self._nosetup
-
-    def force(self, flag = None):
-        if flag: self._force = flag
-        return self._force
-
-    def node(self, val = None):
-        if val: self._node = val
-        return self._node
-
-    def gdb_script(self):
-        if os.path.isdir('/r'):
-            return '/r' + self._gdb_script
-        else:
-            return self._gdb_script
-
-    def debug_path(self):
-        if os.path.isdir('/r'):
-            return '/r' + self._debug_path
-        else:
-            return self._debug_path
-
-    def dump_file(self, val = None):
-        if val: self._dump_file = val
-        return self._dump_file
-    def minlevel(self, val = None):
-        if val: self._minlevel = int(val)
-        return self._minlevel
-
-    def maxlevel(self, val = None):
-        if val: self._maxlevel = int(val)
-        return self._maxlevel
-
-    def portals_dir(self, val = None):
-        if val: self._portals_dir = val
-        return self._portals_dir
-
-    def lustre_dir(self, val = None):
-        if val: self._lustre_dir = val
-        return self._lustre_dir
-
-    def timeout(self, val = None):
-        if val: self._timeout = val
-        return self._timeout
-
-    def recovery_upcall(self, val = None):
-        if val: self._recovery_upcall = val
-        return self._recovery_upcall
-
-    def ldapurl(self, val = None):
-        if val: self._ldapurl = val
-        return self._ldapurl
-
-    def config_name(self, val = None):
-        if val: self._config_name = val
-        return self._config_name
-
-    def init_select(self, arg):
-        # arg = "service=nodeA,service2=nodeB"
-        list = string.split(arg, ',')
-        for entry in list:
-            srv, node = string.split(entry, '=')
-            self._select[srv] = node
-        
-    def select(self, srv):
-        if self._select.has_key(srv):
-            return self._select[srv]
-        return None
-
-    def lctl_dump(self, val = None):
-        if val: self._lctl_dump = val
-        return self._lctl_dump
-
-
-config = Config()
-
 # ============================================================ 
 # debugging and error funcs
 
 def fixme(msg = "this feature"):
-    raise LconfError, msg + ' not implmemented yet.'
+    raise Lustre.LconfError, msg + ' not implmemented yet.'
 
 def panic(*args):
     msg = string.join(map(str,args))
-    if not config.noexec():
-        raise LconfError(msg)
+    if not config.noexec:
+        raise Lustre.LconfError(msg)
     else:
         print "! " + msg
 
@@ -249,10 +138,24 @@ def logall(msgs):
         print string.strip(s)
 
 def debug(*args):
-    if config.verbose():
+    if config.verbose:
         msg = string.join(map(str,args))
         print msg
 
+
+# ack, python's builtin int() does not support '0x123' syntax.
+# eval can do it, although what a hack!
+def my_int(s):
+    try:
+        if s[0:2] == '0x':
+            return eval(s, {}, {})
+        else:
+            return int(s)
+    except SyntaxError, e:
+        raise ValueError("not a number")
+    except NameError, e:
+        raise ValueError("not a number")
+
 # ============================================================
 # locally defined exceptions
 class CommandError (exceptions.Exception):
@@ -278,10 +181,6 @@ class CommandError (exceptions.Exception):
         else:
             print self.cmd_err
 
-class LconfError (exceptions.Exception):
-    def __init__(self, args):
-        self.args = args
-
 
 # ============================================================
 # handle daemons, like the acceptor
@@ -374,6 +273,14 @@ def run_acceptors():
         if not daemon.running():
             daemon.start()
 
+def run_one_acceptor(port):
+    if acceptors.has_key(port):
+        daemon = acceptors[port]
+        if not daemon.running():
+            daemon.start()
+    else:
+         panic("run_one_acceptor: No acceptor defined for port:", port)   
+        
 def stop_acceptor(port):
     if acceptors.has_key(port):
         daemon = acceptors[port]
@@ -395,7 +302,7 @@ class LCTLInterface:
         self.lctl = find_prog(cmd)
         self.save_file = ''
         if not self.lctl:
-            if config.noexec():
+            if config.noexec:
                 debug('! lctl not found')
                 self.lctl = 'lctl'
             else:
@@ -422,7 +329,7 @@ class LCTLInterface:
             cmds = '\n  dump ' + self.save_file + cmds
 
         debug("+", cmd_line, cmds)
-        if config.noexec(): return (0, [])
+        if config.noexec: return (0, [])
 
         child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
         child.tochild.write(cmds + "\n")
@@ -474,19 +381,16 @@ class LCTLInterface:
             
     def network(self, net, nid):
         """ initialized network and add "self" """
-        # Idea: "mynid" could be used for all network types to add "self," and then
-        # this special case would be gone and the "self" hack would be hidden.
-        if net  in ('tcp', 'toe'):
-            cmds =  """
+        cmds =  """
   network %s
   mynid %s
   quit """ % (net, nid)
-            self.run(cmds)
+        self.run(cmds)
 
     # create a new connection
     def connect(self, srv):
         cmds =  "\n  add_uuid %s %s %s" % (srv.uuid, srv.nid, srv.net_type)
-        if srv.net_type  in ('tcp', 'toe') and not config.lctl_dump():
+        if srv.net_type  in ('tcp', 'toe') and not config.lctl_dump:
             flags = ''
             if srv.irq_affinity:
                 flags = flags + 'i'
@@ -503,6 +407,14 @@ class LCTLInterface:
 
         cmds = cmds + "\n  quit"
         self.run(cmds)
+
+    # Recover a device
+    def recover(self, dev_uuid, new_conn):
+        cmds = """
+    device %%%s
+    probe
+    recover %s""" %(dev_uuid, new_conn)
+        self.run(cmds)
                 
     # add a route to a range
     def add_route(self, net, gw, lo, hi):
@@ -553,6 +465,13 @@ class LCTLInterface:
   quit""" % (net, nid, servuuid)
         self.run(cmds)
 
+    def del_uuid(self, servuuid):
+        cmds =  """
+  ignore_errors
+  del_uuid %s
+  quit""" % (servuuid,)
+        self.run(cmds)
+
     # disconnect all
     def disconnectAll(self, net):
         cmds =  """
@@ -572,17 +491,20 @@ class LCTLInterface:
         self.run(cmds)
 
     # cleanup a device
-    def cleanup(self, name, uuid):
+    def cleanup(self, name, uuid, force, failover = 0):
+        if failover: force = 1
         cmds = """
   ignore_errors
   device $%s
-  cleanup %s
+  cleanup %s %s
   detach
-  quit""" % (name, ('', 'force')[config.force()])
+  quit""" % (name, ('', 'force')[force],
+             ('', 'failover')[failover])
         self.run(cmds)
 
     # create an lov
-    def lov_setconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist):
+    def lov_setconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off,
+                      pattern, devlist):
         cmds = """
   device $%s
   probe
@@ -599,7 +521,13 @@ class LCTLInterface:
 
     # get list of devices
     def device_list(self):
-        rc, out = self.runcmd('device_list')
+        try:
+            rc, out = self.runcmd('device_list')
+        except CommandError, e:
+            if config.cleanup:
+                out = []
+            else:
+                raise e
         return out
 
     # get lustre version
@@ -607,6 +535,12 @@ class LCTLInterface:
         rc, out = self.runcmd('version')
         return out
 
+    # dump mount options
+    def mount_option(self, option):
+        cmds = """
+  mount_option %s
+  quit""" % (option)
+        self.run(cmds)
 # ============================================================
 # Various system-level functions
 # (ideally moved to their own module)
@@ -616,7 +550,7 @@ class LCTLInterface:
 # save it if necessary
 def runcmd(cmd):
     debug ("+", cmd)
-    if config.noexec(): return (0, [])
+    if config.noexec: return (0, [])
     f = os.popen(cmd + ' 2>&1')
     out = f.readlines()
     ret = f.close()
@@ -634,7 +568,7 @@ def run(*args):
 def run_daemon(*args):
     cmd = string.join(map(str,args))
     debug ("+", cmd)
-    if config.noexec(): return 0
+    if config.noexec: return 0
     f = os.popen(cmd + ' 2>&1')
     ret = f.close()
     if ret:
@@ -649,8 +583,8 @@ def find_prog(cmd):
     syspath = string.split(os.environ['PATH'], ':')
     cmdpath = os.path.dirname(sys.argv[0])
     syspath.insert(0, cmdpath);
-    if config.portals_dir():
-        syspath.insert(0, os.path.join(config.portals_dir()+'/linux/utils/'))
+    if config.portals:
+        syspath.insert(0, os.path.join(config.portals, 'utils/'))
     for d in syspath:
         prog = os.path.join(d,cmd)
         if os.access(prog, os.X_OK):
@@ -690,25 +624,32 @@ def is_block(path):
 
 # build fs according to type
 # fixme: dangerous
-def mkfs(dev, devsize, fstype):
+def mkfs(dev, devsize, fstype,jsize):
     block_cnt = ''
+    jopt = ''
     if devsize:
+        if devsize < 8000:
+            panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
+                  (dev, devsize))
         # devsize is in 1k, and fs block count is in 4k
         block_cnt = devsize/4
 
-    if(fstype in ('ext3', 'extN')):
+    if fstype in ('ext3', 'extN'):
+        # ext3 journal size is in megabytes
+        if jsize:  jopt = "-J size=%d" %(jsize,)
         mkfs = 'mkfs.ext2 -j -b 4096 -F '
-    elif (fstype == 'reiserfs'):
+    elif fstype == 'reiserfs':
+        # reiserfs journal size is in blocks
+        if jsize:  jopt = "--journal_size %d" %(jsize,)
         mkfs = 'mkreiserfs -ff'
     else:
         print 'unsupported fs type: ', fstype
 
-    (ret, out) = run (mkfs, dev, block_cnt)
+    (ret, out) = run (mkfs, jopt, dev, block_cnt)
     if ret:
-        panic("Unable to build fs:", dev)
+        panic("Unable to build fs:", dev, string.join(out))
     # enable hash tree indexing on fsswe
-    # FIXME: this check can probably go away on 2.5
-    if fstype == 'extN':
+    if fstype in ('ext3', 'extN'):
         htree = 'echo "feature FEATURE_C5" | debugfs -w'
         (ret, out) = run (htree, dev)
         if ret:
@@ -731,7 +672,7 @@ def find_loop(file):
         dev = loop + str(n)
         if os.access(dev, os.R_OK):
             (stat, out) = run('losetup', dev)
-            if (out and stat == 0):
+            if out and stat == 0:
                 m = re.search(r'\((.*)\)', out[0])
                 if m and file == m.group(1):
                     return dev
@@ -740,18 +681,19 @@ def find_loop(file):
     return ''
 
 # create file if necessary and assign the first free loop device
-def init_loop(file, size, fstype):
+def init_loop(file, size, fstype, journal_size):
     dev = find_loop(file)
     if dev:
         print 'WARNING file:', file, 'already mapped to', dev
         return dev
-    if config.reformat()  or not os.access(file, os.R_OK | os.W_OK):
+    if config.reformat or not os.access(file, os.R_OK | os.W_OK):
         if size < 8000:
-            panic(file, "size must be larger than 8MB, currently set to:", size)
+            panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (file,size))
         (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size,
                                                                          file))
         if ret:
             panic("Unable to create backing store:", file)
+        mkfs(file, size, fstype, journal_size)
 
     loop = loop_base()
     # find next free loop
@@ -759,7 +701,7 @@ def init_loop(file, size, fstype):
         dev = loop + str(n)
         if os.access(dev, os.R_OK):
             (stat, out) = run('losetup', dev)
-            if (stat):
+            if stat:
                 run('losetup', dev, file)
                 return dev
         else:
@@ -783,12 +725,12 @@ def need_format(fstype, dev):
     return 0
 
 # initialize a block device if needed
-def block_dev(dev, size, fstype, format):
-    if config.noexec(): return dev
+def block_dev(dev, size, fstype, format, journal_size):
+    if config.noexec: return dev
     if not is_block(dev):
-        dev = init_loop(dev, size, fstype)
-    if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
-        mkfs(dev, size, fstype)
+        dev = init_loop(dev, size, fstype, journal_size)
+    elif config.reformat or (need_format(fstype, dev) and format == 'yes'):
+        mkfs(dev, size, fstype, journal_size)
 
 #    else:
 #        panic("device:", dev,
@@ -807,8 +749,7 @@ def if2addr(iface):
     return ip
 
 def get_local_nid(net_type, wildcard):
-    """Return the local nid. First look for an elan interface,
-      then use the local address. """
+    """Return the local nid."""
     local = ""
     if os.access('/proc/elan/device0/position', os.R_OK):
         local = get_local_address('elan', '*')
@@ -843,16 +784,28 @@ def get_local_address(net_type, wildcard):
             log(e)
     elif net_type == 'gm':
         fixme("automatic local address for GM")
+    elif net_type == 'scimac':
+        scinode="/opt/scali/sbin/scinode"
+        if os.path.exists(scinode):
+            (rc,local) = run(scinode)
+        else:
+            panic (scinode, " not found on node with scimac networking")
+        if rc:
+            panic (scinode, " failed")
+        local=string.rstrip(local[0])
+
     return local
         
 
 def is_prepared(uuid):
     """Return true if a device exists for the uuid"""
-    # expect this format:
-    # 1 UP ldlm ldlm ldlm_UUID 2
-    if config.lctl_dump():
+    if config.lctl_dump:
         return 0
+    if config.noexec and config.cleanup:
+        return 1
     try:
+        # expect this format:
+        # 1 UP ldlm ldlm ldlm_UUID 2
         out = lctl.device_list()
         for s in out:
             if uuid == string.split(s)[4]:
@@ -861,20 +814,27 @@ def is_prepared(uuid):
         e.dump()
     return 0
 
-def is_network_prepared():
-    """If the  PTLRPC device exists, then assumet that all networking
-       has been configured"""
-    if config.lctl_dump():
+def is_prepared_name(name):
+    """Return true if a device exists for the name"""
+    if config.lctl_dump:
         return 0
+    if config.noexec and config.cleanup:
+        return 1
     try:
+        # expect this format:
+        # 1 UP ldlm ldlm ldlm_UUID 2
         out = lctl.device_list()
         for s in out:
-            if 'RPCDEV_UUID' == string.split(s)[4]:
+            if name == string.split(s)[3]:
                 return 1
     except CommandError, e:
         e.dump()
     return 0
-    
+
+def is_network_prepared():
+    """If the LDLM device exists, then assume that all networking
+       has been configured"""
+    return is_prepared('ldlm_UUID')
     
 def fs_is_mounted(path):
     """Return true if path is a mounted lustre filesystem"""
@@ -915,7 +875,7 @@ class Module:
         """ default cleanup, used for most modules """
         self.info()
         try:
-            lctl.cleanup(self.name, self.uuid)
+            lctl.cleanup(self.name, self.uuid, config.force)
         except CommandError, e:
             log(self.module_name, "cleanup failed: ", self.name)
             e.dump()
@@ -923,11 +883,11 @@ class Module:
             
     def add_portals_module(self, dev_dir, modname):
         """Append a module to list of modules to load."""
-        self.kmodule_list.append((config.portals_dir(), dev_dir, modname))
+        self.kmodule_list.append((config.portals, dev_dir, modname))
 
     def add_lustre_module(self, dev_dir, modname):
         """Append a module to list of modules to load."""
-        self.kmodule_list.append((config.lustre_dir(), dev_dir, modname))
+        self.kmodule_list.append((config.lustre, dev_dir, modname))
 
     def mod_loaded(self, modname):
         """Check if a module is already loaded. Look in /proc/modules for it."""
@@ -943,9 +903,9 @@ class Module:
         """Load all the modules in the list in the order they appear."""
         for src_dir, dev_dir, mod in self.kmodule_list:
             #  (rc, out) = run ('/sbin/lsmod | grep -s', mod)
-            if self.mod_loaded(mod) and not config.noexec():
+            if self.mod_loaded(mod) and not config.noexec:
                 continue
-            log ('loading module:', mod)
+            log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir)
             if src_dir:
                 module = find_module(src_dir, dev_dir,  mod)
                 if not module:
@@ -960,27 +920,34 @@ class Module:
             
     def cleanup_module(self):
         """Unload the modules in the list in reverse order."""
+        if not self.safe_to_clean():
+            return
         rev = self.kmodule_list
         rev.reverse()
         for src_dir, dev_dir, mod in rev:
-            if not self.mod_loaded(mod):
+            if not self.mod_loaded(mod) and not config.noexec:
                 continue
             # debug hack
-            if mod == 'portals' and config.dump_file():
-                lctl.dump(config.dump_file())
+            if mod == 'portals' and config.dump:
+                lctl.dump(config.dump)
             log('unloading module:', mod)
-            if config.noexec():
-                continue
             (rc, out) = run('/sbin/rmmod', mod)
             if rc:
                 log('! unable to unload module:', mod)
                 logall(out)
+
+    def safe_to_clean(self):
+        return 1
+        
+    def safe_to_clean_modules(self):
+        return self.safe_to_clean()
         
 class Network(Module):
     def __init__(self,db):
         Module.__init__(self, 'NETWORK', db)
         self.net_type = self.db.get_val('nettype')
         self.nid = self.db.get_val('nid', '*')
+        self.cluster_id = self.db.get_val('clusterid', "0")
         self.port = self.db.get_val_int('port', 0)
         self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF)
         self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF)
@@ -988,7 +955,10 @@ class Network(Module):
         self.nid_exchange = self.db.get_val_int('nidexchange', 0)
 
         if '*' in self.nid:
-            self.nid = get_local_nid(self.net_type, self.nid)
+            if self.nid_exchange:
+                self.nid = get_local_nid(self.net_type, self.nid)
+            else:
+                self.nid = get_local_address(self.net_type, self.nid)
             if not self.nid:
                 panic("unable to set nid for", self.net_type, self.nid)
             debug("nid:", self.nid)
@@ -999,31 +969,88 @@ class Network(Module):
             if not self.nid:
                 panic("unable to set nid for", self.net_type, self.hostaddr)
             debug("hostaddr:", self.hostaddr)
-        # debug ( "hostaddr ", self.hostaddr, "net_type", self.net_type)
 
-        self.add_portals_module("linux/oslib", 'portals')
+        self.add_portals_module("libcfs", 'portals')
         if node_needs_router():
-            self.add_portals_module("linux/router", 'kptlrouter')
+            self.add_portals_module("router", 'kptlrouter')
         if self.net_type == 'tcp':
-            self.add_portals_module("linux/socknal", 'ksocknal')
+            self.add_portals_module("knals/socknal", 'ksocknal')
         if self.net_type == 'toe':
-            self.add_portals_module("/linux/toenal", 'ktoenal')
+            self.add_portals_module("knals/toenal", 'ktoenal')
         if self.net_type == 'elan':
-            self.add_portals_module("/linux/rqswnal", 'kqswnal')
+            self.add_portals_module("knals/qswnal", 'kqswnal')
         if self.net_type == 'gm':
-            self.add_portals_module("/linux/gmnal", 'kgmnal')
-        self.add_lustre_module('obdclass', 'obdclass')
+            self.add_portals_module("knals/gmnal", 'kgmnal')
+        if self.net_type == 'scimac':
+            self.add_portals_module("knals/scimacnal", 'kscimacnal')
 
     def prepare(self):
         if is_network_prepared():
             return
         self.info(self.net_type, self.nid, self.port)
         lctl.network(self.net_type, self.nid)
+        if self.port and  node_is_router():
+            run_one_acceptor(self.port)
+            self.connect_peer_gateways()
+
+    def connect_peer_gateways(self):
+        for router in self.db.lookup_class('node'):
+            if router.get_val_int('router', 0):
+                # if this is a peer with a nid less than mine,
+                # then connect.
+                for netuuid in router.get_networks():
+                    net = self.db.lookup(netuuid)
+                    gw = Network(net)
+                    if (gw.cluster_id == self.cluster_id and
+                        gw.net_type == self.net_type):
+                        # hack: compare as numbers if possible, this should all
+                        # go away once autoconnect is done.
+                        # This also conveniently prevents us from connecting to ourself.
+                        try:
+                            gw_nid = my_int(gw.nid)
+                            self_nid = my_int(self.nid)
+                        except ValueError, e:
+                            print "Error!", str(e)
+                            gw_nid = gw.nid
+                            self_nid = self.nid
+                        if gw_nid < self_nid:
+                            lctl.connect(gw)
+
+    def disconnect_peer_gateways(self):
+        for router in self.db.lookup_class('node'):
+            if router.get_val_int('router', 0):
+                # if this is a peer with a nid less than mine,
+                # then connect.
+                    if (gw.cluster_id == self.cluster_id and
+                        gw.net_type == self.net_type):
+                        # hack: compare as numbers if possible, this should all
+                        # go away once autoconnect is done.
+                        # This also conveniently prevents us from connecting to ourself.
+                        try:
+                            gw_nid = my_int(gw.nid)
+                            self_nid = my_int(self.nid)
+                        except ValueError, e:
+                            print "Error!", str(e)
+                            gw_nid = gw.nid
+                            self_nid = self.nid
+                        if gw_nid < self_nid:
+                            try:
+                                lctl.disconnect(router.net_type, router.nid, router.port,
+                                                router.uuid)
+                            except CommandError, e:
+                                print "disconnectAll failed: ", self.name
+                                e.dump()
+                                cleanup_error(e.rc)
+
+    def safe_to_clean(self):
+        return not is_network_prepared()
 
     def cleanup(self):
         self.info(self.net_type, self.nid, self.port)
-        if self.net_type in ('tcp', 'toe'):
+        if self.port:
             stop_acceptor(self.port)
+        if  node_is_router():
+            self.disconnect_peer_gateways()
         try:
             lctl.disconnectAll(self.net_type)
         except CommandError, e:
@@ -1031,25 +1058,31 @@ class Network(Module):
             e.dump()
             cleanup_error(e.rc)
 
-class Router(Module):
+class RouteTable(Module):
     def __init__(self,db):
-        Module.__init__(self, 'ROUTER', db)
+        Module.__init__(self, 'ROUTES', db)
     def prepare(self):
         if is_network_prepared():
             return
         self.info()
-        for net_type, gw, lo, hi in self.db.get_route_tbl():
+        for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
             lctl.add_route(net_type, gw, lo, hi)
-            if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+            if net_type in ('tcp', 'toe') and local_net_type(net_type) and lo == hi:
                 srvdb = self.db.nid2server(lo, net_type)
-
                 if not srvdb:
                     panic("no server for nid", lo)
                 else:
                     srv = Network(srvdb)
                     lctl.connect(srv)
+
+    def safe_to_clean(self):
+        return not is_network_prepared()
+
     def cleanup(self):
-        for net_type, gw, lo, hi in self.db.get_route_tbl():
+        if is_network_prepared():
+            # the network is still being used, don't clean it up
+            return
+        for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
             if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
                 srvdb = self.db.nid2server(lo, net_type)
                 if not srvdb:
@@ -1072,25 +1105,20 @@ class Router(Module):
 class LDLM(Module):
     def __init__(self,db):
         Module.__init__(self, 'LDLM', db)
+        self.add_lustre_module('obdclass', 'obdclass')
+        self.add_lustre_module('ptlrpc', 'ptlrpc')
         self.add_lustre_module('ldlm', 'ldlm') 
+
     def prepare(self):
         if is_prepared(self.uuid):
             return
         self.info()
         lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid))
-    def cleanup(self):
-        if is_prepared(self.uuid):
-            Module.cleanup(self)
 
-class PTLRPC(Module):
-    def __init__(self,db):
-        Module.__init__(self, 'PTLRPC', db)
-        self.add_lustre_module('ptlrpc', 'ptlrpc') 
-    def prepare(self):
-        if is_prepared(self.uuid):
-            return
-        self.info()
-        lctl.newdev(attach="ptlrpc %s %s" % (self.name, self.uuid))
+    def safe_to_clean(self):
+        out = lctl.device_list()
+        return len(out) <= 1
+
     def cleanup(self):
         if is_prepared(self.uuid):
             Module.cleanup(self)
@@ -1109,7 +1137,7 @@ class LOV(Module):
         self.devlist = self.db.get_refs('obd')
         self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
         self.osclist = []
-        self.mdc_uudi = ''
+        self.mdc_uuid = ''
         for obd_uuid in self.devlist:
             obd = self.db.lookup(obd_uuid)
             osc = get_osc(obd, self.name)
@@ -1123,11 +1151,12 @@ class LOV(Module):
             return
         for osc in self.osclist:
             try:
-                # Ignore connection failures, because the LOV will DTRT with
-                # an unconnected OSC.
-                osc.prepare(ignore_connect_failure=1)
-            except CommandError:
+                # Only ignore connect failures with --force, which
+                # isn't implemented here yet.
+                osc.prepare(ignore_connect_failure=0)
+            except CommandError, e:
                 print "Error preparing OSC %s (inactive)\n" % osc.uuid
+                raise e
         self.mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
         self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
                   self.stripe_off, self.pattern, self.devlist, self.mds_name)
@@ -1178,27 +1207,33 @@ class MDSDEV(Module):
         Module.__init__(self, 'MDSDEV', db)
         self.devpath = self.db.get_val('devpath','')
         self.size = self.db.get_val_int('devsize', 0)
+        self.journal_size = self.db.get_val_int('journalsize', 0)
         self.fstype = self.db.get_val('fstype', '')
         # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
         target_uuid = self.db.get_first_ref('target')
         mds = self.db.lookup(target_uuid)
         self.name = mds.getName()
         self.lovconfig_uuids = mds.get_refs('lovconfig')
+        self.filesystem_uuids = mds.get_refs('filesystem')
         # FIXME: if fstype not set, then determine based on kernel version
         self.format = self.db.get_val('autoformat', "no")
-
-        active_uuid = mds.get_active_target()
+        if mds.get_val('failover', 0):
+            self.failover_mds = 'f'
+        else:
+            self.failover_mds = ''
+        active_uuid = get_active_target(mds)
         if not active_uuid:
             panic("No target device found:", target_uuid)
         if active_uuid == self.uuid:
             self.active = 1
         else:
             self.active = 0
+        if self.active and config.group and config.group != ost.get_val('group'):
+            self.active = 0
+
         self.target_dev_uuid = self.uuid
         self.uuid = target_uuid
         # modules
-        if self.fstype == 'extN':
-            self.add_lustre_module('extN', 'extN') 
         self.add_lustre_module('mds', 'mds')
         if self.fstype:
             self.add_lustre_module('obdclass', 'fsfilt_%s' % (self.fstype))
@@ -1215,7 +1250,8 @@ class MDSDEV(Module):
             return
         self.info(self.devpath, self.fstype, self.format)
         run_acceptors()
-        blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
+        blkdev = block_dev(self.devpath, self.size, self.fstype, self.format,
+                           self.journal_size)
         if not is_prepared('MDT_UUID'):
             lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'),
                         setup ="")
@@ -1225,17 +1261,57 @@ class MDSDEV(Module):
             db = self.db.lookup(uuid)
             lovconfig = LOVConfig(db)
             lovconfig.prepare()
+        if config.mds_ost_conn:
+            for uuid in self.filesystem_uuids:
+                log("open clients for filesystem:", uuid)
+                fs = self.db.lookup(uuid)
+                obd_uuid = fs.get_first_ref('obd')
+                client = VOSC(self.db.lookup(obd_uuid), self.name)
+                client.prepare()
+                
             
+    def msd_remaining(self):
+        out = lctl.device_list()
+        for s in out:
+            if string.split(s)[2] in ('mds',):
+                return 1
+
+    def safe_to_clean(self):
+        return self.active
+
+    def safe_to_clean_modules(self):
+        return not self.msd_remaining()
+
     def cleanup(self):
-        if is_prepared('MDT_UUID'):
+        if not self.active:
+            debug(self.uuid, "not active")
+            return
+        if is_prepared(self.uuid):
+            self.info()
             try:
-                lctl.cleanup("MDT", "MDT_UUID")
+                lctl.cleanup(self.name, self.uuid, config.force,
+                             config.failover)
+            except CommandError, e:
+                log(self.module_name, "cleanup failed: ", self.name)
+                e.dump()
+                cleanup_error(e.rc)
+                Module.cleanup(self)
+        if config.mds_ost_conn:
+            for uuid in self.filesystem_uuids:
+                log("clean clients for filesystem:", uuid)
+                log("open clients for filesystem:", uuid)
+                fs = self.db.lookup(uuid)
+                obd_uuid = fs.get_first_ref('obd')
+                client = VOSC(self.db.lookup(obd_uuid), self.name)
+                client.cleanup()
+        if not self.msd_remaining() and is_prepared('MDT_UUID'):
+            try:
+                lctl.cleanup("MDT", "MDT_UUID", config.force,
+                             config.failover)
             except CommandError, e:
                 print "cleanup failed: ", self.name
                 e.dump()
                 cleanup_error(e.rc)
-        if is_prepared(self.uuid):
-            Module.cleanup(self)
         clean_loop(self.devpath)
 
 class OSD(Module):
@@ -1244,29 +1320,35 @@ class OSD(Module):
         self.osdtype = self.db.get_val('osdtype')
         self.devpath = self.db.get_val('devpath', '')
         self.size = self.db.get_val_int('devsize', 0)
+        self.journal_size = self.db.get_val_int('journalsize', 0)
         self.fstype = self.db.get_val('fstype', '')
         target_uuid = self.db.get_first_ref('target')
         ost = self.db.lookup(target_uuid)
         self.name = ost.getName()
-        # FIXME: if fstype not set, then determine based on kernel version
         self.format = self.db.get_val('autoformat', 'yes')
-        if self.fstype == 'extN':
-            self.add_lustre_module('extN', 'extN') 
+        if ost.get_val('failover', 0):
+            self.failover_ost = 'f'
+        else:
+            self.failover_ost = ''
 
-        active_uuid = ost.get_active_target()
+        active_uuid = get_active_target(ost)
         if not active_uuid:
             panic("No target device found:", target_uuid)
         if active_uuid == self.uuid:
             self.active = 1
         else:
             self.active = 0
+        if self.active and config.group and config.group != ost.get_val('group'):
+            self.active = 0
+            
         self.target_dev_uuid = self.uuid
         self.uuid = target_uuid
         # modules
         self.add_lustre_module('ost', 'ost')
-        self.add_lustre_module(self.osdtype, self.osdtype)
+       # FIXME: should we default to ext3 here?
         if self.fstype:
             self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
+        self.add_lustre_module(self.osdtype, self.osdtype)
 
     def load_module(self):
         if self.active:
@@ -1281,28 +1363,54 @@ class OSD(Module):
         if not self.active:
             debug(self.uuid, "not active")
             return
-        self.info(self.osdtype, self.devpath, self.size, self.fstype, self.format)
+        self.info(self.osdtype, self.devpath, self.size, self.fstype,
+                  self.format, self.journal_size)
         run_acceptors()
         if self.osdtype == 'obdecho':
             blkdev = ''
         else:
-            blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
+            blkdev = block_dev(self.devpath, self.size, self.fstype,
+                               self.format, self.journal_size)
         lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid),
-                    setup ="%s %s" %(blkdev, self.fstype))
+                    setup ="%s %s %s" %(blkdev, self.fstype,
+                                        self.failover_ost))
         if not is_prepared('OSS_UUID'):
             lctl.newdev(attach="ost %s %s" % ('OSS', 'OSS_UUID'),
                         setup ="")
 
+    def osd_remaining(self):
+        out = lctl.device_list()
+        for s in out:
+            if string.split(s)[2] in ('obdfilter', 'obdecho'):
+                return 1
+
+    def safe_to_clean(self):
+        return self.active
+
+    def safe_to_clean_modules(self):
+        return not self.osd_remaining()
+
     def cleanup(self):
-        if is_prepared('OSS_UUID'):
+        if not self.active:
+            debug(self.uuid, "not active")
+            return
+        if is_prepared(self.uuid):
+            self.info()
             try:
-                lctl.cleanup("OSS", "OSS_UUID")
+                lctl.cleanup(self.name, self.uuid, config.force,
+                             config.failover)
+            except CommandError, e:
+                log(self.module_name, "cleanup failed: ", self.name)
+                e.dump()
+                cleanup_error(e.rc)
+        if not self.osd_remaining() and is_prepared('OSS_UUID'):
+            try:
+                lctl.cleanup("OSS", "OSS_UUID", config.force,
+                             config.failover)
             except CommandError, e:
                 print "cleanup failed: ", self.name
                 e.dump()
                 cleanup_error(e.rc)
-        if is_prepared(self.uuid):
-            Module.cleanup(self)
         if not self.osdtype == 'obdecho':
             clean_loop(self.devpath)
 
@@ -1313,7 +1421,7 @@ class Client(Module):
         self.target_uuid = tgtdb.getUUID()
         self.db = tgtdb
 
-        self.tgt_dev_uuid = tgtdb.get_active_target()
+        self.tgt_dev_uuid = get_active_target(tgtdb)
         if not self.tgt_dev_uuid:
             panic("No target device found for target:", self.target_name)
             
@@ -1323,9 +1431,10 @@ class Client(Module):
 
         self.module = module
         self.module_name = string.upper(module)
-        self.name = '%s_%s_%s' % (self.module_name, owner, self.target_name)
-        self.uuid = '%05x%05x_%.14s_%05x%05x' % (int(random.random() * 1048576),
-                                              int(random.random() * 1048576),self.name,
+        self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
+                                     self.target_name, owner)
+        self.uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
+                                              self.name,
                                               int(random.random() * 1048576),
                                               int(random.random() * 1048576))
         self.uuid = self.uuid[0:36]
@@ -1334,7 +1443,7 @@ class Client(Module):
 
     def lookup_server(self, srv_uuid):
         """ Lookup a server's network information """
-        self._server_nets = self.db.get_ost_net(srv_uuid)
+        self._server_nets = get_ost_net(self.db, srv_uuid)
         if len(self._server_nets) == 0:
             panic ("Unable to find a server for:", srv_uuid)
 
@@ -1342,11 +1451,11 @@ class Client(Module):
         return self._server_nets
 
     def prepare(self, ignore_connect_failure = 0):
-        if is_prepared(self.uuid):
-            return
         self.info(self.target_uuid)
+        if is_prepared_name(self.name):
+            self.cleanup()
         try:
-            srv = local_net(self.get_servers())
+            srv = choose_local_server(self.get_servers())
             if srv:
                 lctl.connect(srv)
             else:
@@ -1355,34 +1464,28 @@ class Client(Module):
                     lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
                 else:
                     panic ("no route to",  self.target_uuid)
-        except CommandError:
-            if (ignore_connect_failure == 0):
-                pass
+        except CommandError, e:
+            if not ignore_connect_failure:
+                raise e
         if srv:
             lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
                         setup ="%s %s" %(self.target_uuid, srv.uuid))
 
     def cleanup(self):
-        Module.cleanup(self)
-        srv = local_net(self.get_servers())
-        if srv:
+        if is_prepared_name(self.name):
+            Module.cleanup(self)
             try:
-                lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+                srv = choose_local_server(self.get_servers())
+                if srv:
+                    lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+                else:
+                    srv, r =  find_route(self.get_servers())
+                    if srv:
+                        lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
             except CommandError, e:
-                log(self.module_name, "disconnect failed: ", self.name)
+                log(self.module_name, "cleanup failed: ", self.name)
                 e.dump()
                 cleanup_error(e.rc)
-        else:
-            self.info(self.target_uuid)
-            srv, r =  find_route(self.get_servers())
-            if srv:
-                try:
-                    lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
-                except CommandError, e:
-                    print "del_route failed: ", self.name
-                    e.dump()
-                    cleanup_error(e.rc)
-
 
 
 class MDC(Client):
@@ -1472,8 +1575,10 @@ class Mountpoint(Module):
     def __init__(self,db):
         Module.__init__(self, 'MTPT', db)
         self.path = self.db.get_val('path')
-        self.mds_uuid = self.db.get_first_ref('mds')
-        self.obd_uuid = self.db.get_first_ref('obd')
+        self.fs_uuid = self.db.get_first_ref('filesystem')
+        fs = self.db.lookup(self.fs_uuid)
+        self.mds_uuid = fs.get_first_ref('mds')
+        self.obd_uuid = fs.get_first_ref('obd')
         obd = self.db.lookup(self.obd_uuid)
         self.vosc = VOSC(obd, self.name)
         if self.vosc.need_mdc():
@@ -1482,25 +1587,36 @@ class Mountpoint(Module):
 
 
     def prepare(self):
+        if fs_is_mounted(self.path):
+            log(self.path, "already mounted.")
+            return
         self.vosc.prepare()
         if self.vosc.need_mdc():
             mdc_uuid = prepare_mdc(self.db, self.name,  self.mds_uuid)
         else:
             mdc_uuid = self.vosc.get_mdc_uuid()
         if not mdc_uuid:
+            self.vosc.cleanup()
             panic("Unable to determine MDC UUID. Probably need to cleanup before re-mounting.")
         self.info(self.path, self.mds_uuid, self.obd_uuid)
+        if config.lctl_dump:
+            cmd = "osc=%s,mdc=%s" % (self.vosc.get_uuid(), mdc_uuid)
+            lctl.mount_option(cmd)
+            return
         cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
               (self.vosc.get_uuid(), mdc_uuid, self.path)
         run("mkdir", self.path)
         ret, val = run(cmd)
         if ret:
-            panic("mount failed:", self.path)
+            self.vosc.cleanup()
+            if self.vosc.need_mdc():
+                cleanup_mdc(self.db, self.name, self.mds_uuid)
+            panic("mount failed:", self.path, ":", string.join(val))
 
     def cleanup(self):
         self.info(self.path, self.mds_uuid,self.obd_uuid)
-        if  fs_is_mounted(self.path):
-            if config.force():
+        if fs_is_mounted(self.path):
+            if config.force:
                 (rc, out) = run("umount", "-f", self.path)
             else:
                 (rc, out) = run("umount", self.path)
@@ -1523,426 +1639,64 @@ class Mountpoint(Module):
 
 
 # ============================================================
-# XML processing and query
-
-class LustreDB:
-    def lookup(self, uuid):
-        """ lookup returns a new LustreDB instance"""
-        return self._lookup_by_uuid(uuid)
-
-    def lookup_name(self, name, class_name = ""):
-        """ lookup returns a new LustreDB instance"""
-        return self._lookup_by_name(name, class_name)
-
-    def lookup_class(self, class_name):
-        """ lookup returns a new LustreDB instance"""
-        return self._lookup_by_class(class_name)
-
-    def get_val(self, tag, default=None):
-        v =  self._get_val(tag)
-        if v:
-            return v
-        if default != None:
-            return default
-        debug("LustreDB", self.getName(), " no value for:", tag)
-        return None
-
-    def get_class(self):
-        return self._get_class()
-
-    def get_val_int(self, tag, default=0):
-        str = self._get_val(tag)
-        try:
-            if str:
-                return int(str)
-            return default
-        except ValueError:
-            panic("text value is not integer:", str)
-            
-    def get_first_ref(self, tag):
-        """ Get the first uuidref of the type TAG. Only
-        one is expected.  Returns the uuid."""
-        uuids = self._get_refs(tag)
-        if len(uuids) > 0:
-            return  uuids[0]
-        return None
-    
-    def get_refs(self, tag):
-        """ Get all the refs of type TAG.  Returns list of uuids. """
-        uuids = self._get_refs(tag)
-        return uuids
-
-    def get_all_refs(self):
-        """ Get all the refs.  Returns list of uuids. """
-        uuids = self._get_all_refs()
-        return uuids
-
-    def get_ost_net(self, osd_uuid):
-        srv_list = []
-        if not osd_uuid:
-            return srv_list
-        osd = self.lookup(osd_uuid)
-        node_uuid = osd.get_first_ref('node')
-        node = self.lookup(node_uuid)
-        if not node:
-            panic("unable to find node for osd_uuid:", osd_uuid,
-                  " node_ref:", node_uuid)
-        for net_uuid in node.get_networks():
-            db = node.lookup(net_uuid)
-            srv_list.append(Network(db))
-        return srv_list
+# misc query functions
 
-    def nid2server(self, nid, net_type):
-        netlist = self.lookup_class('network')
-        for net_db in netlist:
-            if net_db.get_val('nid') == nid and net_db.get_val('nettype') == net_type: 
-                return net_db
-        return None
-    
-    # the tag name is the service type
-    # fixme: this should do some checks to make sure the dom_node is a service
-    #
-    # determine what "level" a particular node is at.
-    
-    # the order of iniitailization is based on level. 
-    def getServiceLevel(self):
-        type = self.get_class()
-        ret=0;
-        if type in ('network',):
-            ret = 5
-        elif type in ('routetbl',):
-            ret = 6
-        elif type in ('ptlrpc',):
-            ret = 7
-        elif type in ('device', 'ldlm'):
-            ret = 20
-        elif type in ('osd', 'mdd', 'cobd'):
-            ret = 30
-        elif type in ('mdsdev','ost'):
-            ret = 40
-        elif type in ('mdc','osc'):
-            ret = 50
-        elif type in ('lov',):
-            ret = 60
-        elif type in ('mountpoint', 'echoclient'):
-            ret = 70
-
-        if ret < config.minlevel() or ret > config.maxlevel():
-            ret = 0 
-        return ret
-    
-    #
-    # return list of services in a profile. list is a list of tuples
-    # [(level, db_object),]
-    def getServices(self):
-        list = []
-        for ref_class, ref_uuid in self.get_all_refs(): 
-                servdb = self.lookup(ref_uuid)
-                if  servdb:
-                    level = servdb.getServiceLevel()
-                    if level > 0:
-                        list.append((level, servdb))
-                else:
-                    panic('service not found: ' + ref_uuid)
-                    
-        list.sort()
-        return list
-
-    # Find the target_device for target on a node
-    # node->profiles->device_refs->target
-    def get_target_device(self, target_uuid, node_name):
-        node_db = self.lookup_name(node_name)
-        if not node_db:
-            return None
-        prof_list = node_db.get_refs('profile')
-        for prof_uuid in prof_list:
-            prof_db = node_db.lookup(prof_uuid)
-            ref_list = prof_db.get_all_refs()
-            for ref in ref_list:
-                dev = self.lookup(ref[1])
-                if dev and dev.get_first_ref('target') == target_uuid:
-                    return ref[1]
-        return None
-
-    def get_active_target(self):
-        target_uuid = self.getUUID()
-        target_name = self.getName()
-        node_name = config.select(target_name)
-        if node_name:
-            tgt_dev_uuid = self.get_target_device(target_uuid, node_name)
-        else:
-            tgt_dev_uuid = self.get_first_ref('active')
-        return tgt_dev_uuid
-        
-
-    # get all network uuids for this node
-    def get_networks(self):
-        ret = []
-        prof_list = self.get_refs('profile')
-        for prof_uuid in prof_list:
-            prof_db = self.lookup(prof_uuid)
-            net_list = prof_db.get_refs('network')
-            #debug("get_networks():", prof_uuid, net_list)
-            for net_uuid in net_list:
-                ret.append(net_uuid)
-        return ret
-
-class LustreDB_XML(LustreDB):
-    def __init__(self, dom, root_node):
-        # init xmlfile
-        self.dom_node = dom
-        self.root_node = root_node
-
-    def xmltext(self, dom_node, tag):
-        list = dom_node.getElementsByTagName(tag)
-        if len(list) > 0:
-            dom_node = list[0]
-            dom_node.normalize()
-            if dom_node.firstChild:
-                txt = string.strip(dom_node.firstChild.data)
-                if txt:
-                    return txt
-
-    def xmlattr(self, dom_node, attr):
-        return dom_node.getAttribute(attr)
-
-    def _get_val(self, tag):
-        """a value could be an attribute of the current node
-        or the text value in a child node"""
-        ret  = self.xmlattr(self.dom_node, tag)
-        if not ret:
-            ret = self.xmltext(self.dom_node, tag)
-        return ret
-
-    def _get_class(self):
-        return self.dom_node.nodeName
-
-    #
-    # [(ref_class, ref_uuid),]
-    def _get_all_refs(self):
-        list = []
-        for n in self.dom_node.childNodes: 
-            if n.nodeType == n.ELEMENT_NODE:
-                ref_uuid = self.xml_get_ref(n)
-                ref_class = n.nodeName
-                list.append((ref_class, ref_uuid))
-                    
-        list.sort()
-        return list
-
-    def _get_refs(self, tag):
-        """ Get all the refs of type TAG.  Returns list of uuids. """
-        uuids = []
-        refname = '%s_ref' % tag
-        reflist = self.dom_node.getElementsByTagName(refname)
-        for r in reflist:
-            uuids.append(self.xml_get_ref(r))
-        return uuids
-
-    def xmllookup_by_uuid(self, dom_node, uuid):
-        for n in dom_node.childNodes:
-            if n.nodeType == n.ELEMENT_NODE:
-                if self.xml_get_uuid(n) == uuid:
-                    return n
-                else:
-                    n = self.xmllookup_by_uuid(n, uuid)
-                    if n: return n
-        return None
-
-    def _lookup_by_uuid(self, uuid):
-        dom = self. xmllookup_by_uuid(self.root_node, uuid)
-        if dom:
-            return LustreDB_XML(dom, self.root_node)
-
-    def xmllookup_by_name(self, dom_node, name):
-        for n in dom_node.childNodes:
-            if n.nodeType == n.ELEMENT_NODE:
-                if self.xml_get_name(n) == name:
-                    return n
-                else:
-                    n = self.xmllookup_by_name(n, name)
-                    if n: return n
-        return None
-
-    def _lookup_by_name(self, name, class_name):
-        dom = self.xmllookup_by_name(self.root_node, name)
-        if dom:
-            return LustreDB_XML(dom, self.root_node)
-
-    def xmllookup_by_class(self, dom_node, class_name):
-        return dom_node.getElementsByTagName(class_name)
-
-    def _lookup_by_class(self, class_name):
-        ret = []
-        domlist = self.xmllookup_by_class(self.root_node, class_name)
-        for node in domlist:
-            ret.append(LustreDB_XML(node, self.root_node))
-        return ret
-
-    def xml_get_name(self, n):
-        return n.getAttribute('name')
-        
-    def getName(self):
-        return self.xml_get_name(self.dom_node)
-
-    def xml_get_ref(self, n):
-        return n.getAttribute('uuidref')
-
-    def xml_get_uuid(self, dom_node):
-        return dom_node.getAttribute('uuid')
-
-    def getUUID(self):
-        return self.xml_get_uuid(self.dom_node)
-
-    def get_routes(self, type, gw):
-        """ Return the routes as a list of tuples of the form:
-        [(type, gw, lo, hi),]"""
-        res = []
-        tbl = self.dom_node.getElementsByTagName('routetbl')
-        for t in tbl:
-            routes = t.getElementsByTagName('route')
-            for r in routes:
-                net_type = self.xmlattr(r, 'type')
-                if type != net_type:
-                    lo = self.xmlattr(r, 'lo')
-                    hi = self.xmlattr(r, 'hi')
-                    res.append((type, gw, lo, hi))
-        return res
-
-    def get_route_tbl(self):
-        ret = []
-        for r in self.dom_node.getElementsByTagName('route'):
-            net_type = self.xmlattr(r, 'type')
-            gw = self.xmlattr(r, 'gw')
-            lo = self.xmlattr(r, 'lo')
-            hi = self.xmlattr(r, 'hi')
-            ret.append((net_type, gw, lo, hi))
-        return ret
-
-
-# ================================================================    
-# LDAP Support
-class LustreDB_LDAP(LustreDB):
-    def __init__(self, name, attrs,
-                 base = "fs=lustre",
-                 parent = None,
-                 url  = "ldap://localhost",
-                 user = "cn=Manager, fs=lustre",
-                 pw   = "secret"
-                 ):
-        self._name = name
-        self._attrs = attrs
-        self._base = base
-        self._parent = parent
-        self._url  = url
-        self._user = user
-        self._pw   = pw
-        if parent:
-            self.l = parent.l
-            self._base = parent._base
-        else:
-            self.open()
-
-    def open(self):
-        import ldap
-        try:
-            self.l = ldap.initialize(self._url)
-            # Set LDAP protocol version used
-            self.l.protocol_version=ldap.VERSION3
-            # user and pw only needed if modifying db
-            self.l.bind_s("", "", ldap.AUTH_SIMPLE);
-        except ldap.LDAPError, e:
-            panic(e)
-            # FIXME, do something useful here
-
-    def close(self):
-        self.l.unbind_s()
-
-    def ldap_search(self, filter):
-        """Return list of uuids matching the filter."""
-        import ldap
-        dn = self._base
-        ret = []
-        uuids = []
-        try:
-            for name, attrs in self.l.search_s(dn, ldap.SCOPE_ONELEVEL,
-                                        filter, ["uuid"]):
-                for v in attrs['uuid']:
-                    uuids.append(v)
-        except ldap.NO_SUCH_OBJECT, e:
-            pass
-        except ldap.LDAPError, e:
-            print e                     # FIXME: die here?
-        if len(uuids) > 0:
-            for uuid in uuids:
-                ret.append(self._lookup_by_uuid(uuid))
-        return ret
-
-    def _lookup_by_name(self, name, class_name):
-        list =  self.ldap_search("lustreName=%s" %(name))
-        if len(list) == 1:
-            return list[0]
-        return []
-
-    def _lookup_by_class(self, class_name):
-        return self.ldap_search("objectclass=%s" %(string.upper(class_name)))
-
-    def _lookup_by_uuid(self, uuid):
-        import ldap
-        dn = "uuid=%s,%s" % (uuid, self._base)
-        ret = None
-        try:
-            for name, attrs in self.l.search_s(dn, ldap.SCOPE_BASE,
-                                               "objectclass=*"):
-                ret = LustreDB_LDAP(name, attrs,  parent = self)
-                        
-        except ldap.NO_SUCH_OBJECT, e:
-            debug("NO_SUCH_OBJECT:", uuid)
-            pass                        # just return empty list
-        except ldap.LDAPError, e:
-            print e                     # FIXME: die here?
-        return ret
+def get_ost_net(self, osd_uuid):
+    srv_list = []
+    if not osd_uuid:
+        return srv_list
+    osd = self.lookup(osd_uuid)
+    node_uuid = osd.get_first_ref('node')
+    node = self.lookup(node_uuid)
+    if not node:
+        panic("unable to find node for osd_uuid:", osd_uuid,
+              " node_ref:", node_uuid)
+    for net_uuid in node.get_networks():
+        db = node.lookup(net_uuid)
+        srv_list.append(Network(db))
+    return srv_list
+
+
+# the order of iniitailization is based on level. 
+def getServiceLevel(self):
+    type = self.get_class()
+    ret=0;
+    if type in ('network',):
+        ret = 5
+    elif type in ('routetbl',):
+        ret = 6
+    elif type in ('ldlm',):
+        ret = 20
+    elif type in ('osd', 'cobd'):
+        ret = 30
+    elif type in ('mdsdev',):
+        ret = 40
+    elif type in ('mountpoint', 'echoclient'):
+        ret = 70
+    else:
+        panic("Unknown type: ", type)
 
+    if ret < config.minlevel or ret > config.maxlevel:
+        ret = 0 
+    return ret
 
-    def _get_val(self, k):
-        ret = None
-        if self._attrs.has_key(k):
-            v = self._attrs[k]
-            if type(v) == types.ListType:
-                ret = str(v[0])
+#
+# return list of services in a profile. list is a list of tuples
+# [(level, db_object),]
+def getServices(self):
+    list = []
+    for ref_class, ref_uuid in self.get_all_refs(): 
+            servdb = self.lookup(ref_uuid)
+            if  servdb:
+                level = getServiceLevel(servdb)
+                if level > 0:
+                    list.append((level, servdb))
             else:
-                ret = str(v)
-        return ret
-
-    def _get_class(self):
-        return string.lower(self._attrs['objectClass'][0])
-
-    #
-    # [(ref_class, ref_uuid),]
-    def _get_all_refs(self):
-        list = []
-        for k in self._attrs.keys():
-            if re.search('.*Ref', k):
-                for uuid in self._attrs[k]:
-                    list.append((k, uuid))
-        return list
+                panic('service not found: ' + ref_uuid)
 
-    def _get_refs(self, tag):
-        """ Get all the refs of type TAG.  Returns list of uuids. """
-        uuids = []
-        refname = '%sRef' % tag
-        if self._attrs.has_key(refname):
-            return self._attrs[refname]
-        return []
+    list.sort()
+    return list
 
-    def getName(self):
-        return self._get_val('lustreName')
-
-    def getUUID(self):
-        return self._get_val('uuid')
-
-    def get_route_tbl(self):
-        return []
 
 ############################################################
 # MDC UUID hack - 
@@ -1973,85 +1727,102 @@ def cleanup_mdc(db, owner, mds_uuid):
 
 ############################################################
 # routing ("rooting")
-#
-routes = []
-local_node = []
-router_flag = 0
 
-def add_local_interfaces(node_db):
-    global local_node
+# list of (nettype, cluster_id)
+local_clusters = []
+
+def find_local_clusters(node_db):
+    global local_clusters
     for netuuid in node_db.get_networks():
         net = node_db.lookup(netuuid)
         srv = Network(net)
         debug("add_local", netuuid)
-        local_node.append((srv.net_type, srv.nid))
-        if acceptors.has_key(srv.port):
-            panic("duplicate port:", srv.port)
-        if srv.net_type in ('tcp', 'toe'):
+        local_clusters.append((srv.net_type, srv.cluster_id))
+        if srv.port > 0:
+            if acceptors.has_key(srv.port):
+                panic("duplicate port:", srv.port)
             acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type,
                                                   srv.send_mem, srv.recv_mem,
                                                   srv.irq_affinity,
                                                   srv.nid_exchange)
 
+# This node is a gateway.
+is_router = 0
+def node_is_router():
+    return is_router
+
+# If there are any routers found in the config, then this will be true
+# and all nodes will load kptlrouter.
+needs_router = 0
 def node_needs_router():
-    return router_flag
+    return needs_router or is_router
+
+# list of (nettype, gw, tgt_cluster_id, lo, hi)
+# Currently, these local routes are only added to kptlrouter route
+# table if they are needed to connect to a specific server.  This
+# should be changed so all available routes are loaded, and the
+# ptlrouter can make all the decisions.
+local_routes = []
 
-def init_route_config(lustre):
-    """ Scan the lustre config looking for routers.  Build list of
+def find_local_routes(lustre):
+    """ Scan the lustre config looking for routers .  Build list of
     routes. """
-    global routes, router_flag
-    routes = []
+    global local_routes, needs_router
+    local_routes = []
     list = lustre.lookup_class('node')
-    for node_db in list:
-        if node_db.get_val_int('router', 0):
-            router_flag = 1
-            #debug("init_route_config: found router", node_db.getName())
-            for (local_type, local_nid) in local_node:
-                #debug("init_route_config:", local_type, local_nid)
+    for router in list:
+        if router.get_val_int('router', 0):
+            needs_router = 1
+            for (local_type, local_cluster_id) in local_clusters:
                 gw = None
-                for netuuid in node_db.get_networks():
-                    db = node_db.lookup(netuuid)
-                    if local_type == db.get_val('nettype'):
+                for netuuid in router.get_networks():
+                    db = router.lookup(netuuid)
+                    if (local_type == db.get_val('nettype') and
+                       local_cluster_id == db.get_val('clusterid')):
                         gw = db.get_val('nid')
                         break
-                #debug("init_route_config: gw is", gw)
-                if not gw:
-                    continue
-                for route in node_db.get_routes(local_type, gw):
-                    routes.append(route)
-    debug("init_route_config routes:", routes)
-
-
-def local_net(srv_list):
-    global local_node
-    for iface in local_node:
-        for srv in srv_list:
-            #debug("local_net a:", srv.net_type, "b:", iface[0])
-            if srv.net_type == iface[0]:
-                return srv
-    return None
+                if gw:
+                    debug("find_local_routes: gw is", gw)
+                    for route in router.get_local_routes(local_type, gw):
+                        local_routes.append(route)
+    debug("find_local_routes:", local_routes)
+
+
+def choose_local_server(srv_list):
+    for srv in srv_list:
+        if local_net_type(srv.net_type):
+            return srv
 
 def local_net_type(net_type):
-    global local_node
-    for iface in local_node:
-        if net_type == iface[0]:
+    for cluster in local_clusters:
+        if net_type == cluster[0]:
             return 1
     return 0
 
 def find_route(srv_list):
-    global local_node, routes
-    frm_type = local_node[0][0]
+    frm_type = local_clusters[0][0]
     for srv in srv_list:
-        #debug("find_route: srv:", srv.hostaddr, "type: ", srv.net_type)
+        debug("find_route: srv:", srv.hostaddr, "type: ", srv.net_type)
         to_type = srv.net_type
-        to = srv.hostaddr
-        #debug ('looking for route to', to_type, to)
-        for r in routes:
-            #debug("find_route: ", r)
-            if  r[2] == to:
+        to = srv.hostaddr  # XXX should this be hostaddr, or nid?
+        cluster_id = srv.cluster_id
+        debug ('looking for route to', to_type, to)
+        for r in local_routes:
+            debug("find_route: ", r)
+            if  (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
                 return srv, r
     return None,None
            
+def get_active_target(db):
+    target_uuid = db.getUUID()
+    target_name = db.getName()
+    node_name = get_select(target_name)
+    if node_name:
+        tgt_dev_uuid = db.get_target_device(target_uuid, node_name)
+    else:
+        tgt_dev_uuid = db.get_first_ref('active')
+    return tgt_dev_uuid
+
 
 ############################################################
 # lconf level logic
@@ -2062,14 +1833,12 @@ def newService(db):
     n = None
     if type == 'ldlm':
         n = LDLM(db)
-    elif type == 'ptlrpc':
-        n = PTLRPC(db)
     elif type == 'lov':
         n = LOV(db)
     elif type == 'network':
         n = Network(db)
     elif type == 'routetbl':
-        n = Router(db)
+        n = RouteTable(db)
     elif type == 'osd':
         n = OSD(db)
     elif type == 'cobd':
@@ -2097,44 +1866,45 @@ def for_each_profile(db, prof_list, operation):
         prof_db = db.lookup(prof_uuid)
         if not prof_db:
             panic("profile:", profile, "not found.")
-        services = prof_db.getServices()
+        services = getServices(prof_db)
         operation(services)
         
 def doSetup(services):
-    if config.nosetup():
+    if config.nosetup:
         return
     for s in services:
         n = newService(s[1])
         n.prepare()
     
 def doModules(services):
-    if config.nomod():
+    if config.nomod:
         return
     for s in services:
         n = newService(s[1])
         n.load_module()
 
 def doCleanup(services):
-    if config.nosetup():
+    if config.nosetup:
         return
     services.reverse()
     for s in services:
         n = newService(s[1])
-        n.cleanup()
+        if n.safe_to_clean():
+            n.cleanup()
 
 def doUnloadModules(services):
-    if config.nomod():
+    if config.nomod:
         return
     services.reverse()
     for s in services:
         n = newService(s[1])
-        n.cleanup_module()
+        if n.safe_to_clean_modules():
+            n.cleanup_module()
 
 #
 # Load profile for 
 def doHost(lustreDB, hosts):
-    global routes
-    global router_flag 
+    global is_router 
     node_db = None
     for h in hosts:
         node_db = lustreDB.lookup_name(h, 'node')
@@ -2144,188 +1914,168 @@ def doHost(lustreDB, hosts):
         print 'No host entry found.'
         return
 
-    router_flag = node_db.get_val_int('router', 0)
-    recovery_upcall = node_db.get_val('recovery_upcall', '')
+    is_router = node_db.get_val_int('router', 0)
+    lustre_upcall = node_db.get_val('lustreUpcall', '')
+    portals_upcall = node_db.get_val('portalsUpcall', '')
     timeout = node_db.get_val_int('timeout', 0)
 
-    add_local_interfaces(node_db)
-    if not router_flag:
-        init_route_config(lustreDB)
+    find_local_clusters(node_db)
+    if not is_router:
+        find_local_routes(lustreDB)
 
     # Two step process: (1) load modules, (2) setup lustre
     # if not cleaning, load modules first.
     prof_list = node_db.get_refs('profile')
 
-    if config.cleanup():
-        if config.force():
+    if config.recover:
+        if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
+            raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
+                                     "--client_uuid <UUID> --conn_uuid <UUID>")
+        doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
+                   config.conn_uuid)
+    elif config.cleanup:
+        if config.force:
             # the command line can override this value
             timeout = 5
         # ugly hack, only need to run lctl commands for --dump
-        if config.lctl_dump():
+        if config.lctl_dump:
             for_each_profile(node_db, prof_list, doCleanup)
             return
 
         sys_set_timeout(timeout)
-        sys_set_recovery_upcall(recovery_upcall)
+        sys_set_ptldebug()
+        sys_set_subsystem()
+        sys_set_lustre_upcall(lustre_upcall)
+        sys_set_portals_upcall(portals_upcall)
 
         for_each_profile(node_db, prof_list, doCleanup)
         for_each_profile(node_db, prof_list, doUnloadModules)
 
     else:
         # ugly hack, only need to run lctl commands for --dump
-        if config.lctl_dump():
+        if config.lctl_dump:
             for_each_profile(node_db, prof_list, doSetup)
             return
 
+        sys_make_devices()
+        sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
+        sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
+
         for_each_profile(node_db, prof_list, doModules)
 
         sys_set_debug_path()
-        script = config.gdb_script()
+        sys_set_ptldebug()
+        sys_set_subsystem()
+        script = config.gdb_script
         run(lctl.lctl, ' modules >', script)
-        if config.gdb():
+        if config.gdb:
             log ("The GDB module script is in", script)
             # pause, so user has time to break and
             # load the script
             time.sleep(5)
         sys_set_timeout(timeout)
-        sys_set_recovery_upcall(recovery_upcall)
+        sys_set_lustre_upcall(lustre_upcall)
+        sys_set_portals_upcall(portals_upcall)
 
         for_each_profile(node_db, prof_list, doSetup)
 
-############################################################
-# Command line processing
-#
-def parse_cmdline(argv):
-    short_opts = "hdnvf"
-    long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
-                 "portals=", "makeldiff", "cleanup", "noexec",
-                 "help", "node=", "nomod", "nosetup",
-                 "dump=", "force", "minlevel=", "maxlevel=",
-                 "timeout=", "recovery_upcall=",
-                 "ldapurl=", "config=", "select=", "lctl_dump="]
-    opts = []
-    args = []
+def doRecovery(db, lctl, tgt_uuid, client_uuid, conn_uuid):
+    tgt = db.lookup(tgt_uuid)
+    if not tgt:
+        raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
+    new_uuid = get_active_target(tgt)
+    if not new_uuid:
+        raise Lustre.LconfError("doRecovery: no active target found for: " +
+                                tgt_uuid)
+    net = choose_local_server(get_ost_net(db, new_uuid))
+    if not net:
+        raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
+    # XXX, better to do a full disconnect here
+    log("Reconnecting", tgt_uuid, " to ",  net.uuid);
+    lctl.del_uuid(conn_uuid)
+    lctl.connect(net)
+    lctl.recover(client_uuid, net.uuid)
 
-    try:
-        opts, args = getopt.getopt(argv, short_opts, long_opts)
-    except getopt.error:
-        print "invalid opt"
-        usage()
-    
-    for o, a in opts:
-        if o in ("-h", "--help"):
-            usage()
-        if o in ("-d","--cleanup"):
-            config.cleanup(1)
-        if o in ("-v", "--verbose"):
-            config.verbose(1)
-        if o in ("-n", "--noexec"):
-            config.noexec(1)
-        if o == "--portals":
-            config.portals_dir(a)
-        if o == "--lustre":
-            config.lustre_dir(a)
-        if o == "--reformat":
-            config.reformat(1)
-        if o == "--node":
-            config.node(a)
-        if o == "--gdb":
-            config.gdb(1)
-        if o == "--nomod":
-            config.nomod(1)
-        if o == "--nosetup":
-            config.nosetup(1)
-        if o == "--dump":
-            config.dump_file(a)
-        if o in ("-f", "--force"):
-            config.force(1)
-       if o == "--minlevel":
-               config.minlevel(a)
-        if o == "--maxlevel":
-                config.maxlevel(a)
-        if o == "--timeout":
-                config.timeout(a)
-        if o == "--recovery_upcall":
-                config.recovery_upcall(a)
-        if o == "--ldapurl":
-                config.ldapurl(a)
-        if o == "--config":
-                config.config_name(a)
-        if o == "--select":
-                config.init_select(a)
-        if o == "--lctl_dump":
-            config.lctl_dump(a)
-
-    return args
-
-def fetch(url):
-    import urllib
-    data = ""
-    try:
-        s = urllib.urlopen(url)
-        data = s.read()
-    except:
-        usage()
-    return data
 
 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
     base = os.path.dirname(cmd)
-    if os.access(base+"/Makefile", os.R_OK):
-        if not config.lustre_dir():
-            config.lustre_dir(os.path.join(base, ".."))
+    if development_mode():
+        if not config.lustre:
+            config.lustre = (os.path.join(base, ".."))
         # normalize the portals dir, using command line arg if set
-        if config.portals_dir():
-            portals_dir = config.portals_dir()
-        dir = os.path.join(config.lustre_dir(), portals_dir)
-        config.portals_dir(dir)
-    elif config.lustre_dir() and config.portals_dir():
+        if config.portals:
+            portals_dir = config.portals
+        dir = os.path.join(config.lustre, portals_dir)
+        config.portals = dir
+        debug('config.portals', config.portals)
+    elif config.lustre and config.portals:
         # production mode
         # if --lustre and --portals, normalize portals 
         # can ignore POTRALS_DIR here, since it is probly useless here
-        dir = config.portals_dir()
-        dir = os.path.join(config.lustre_dir(), dir)
-        config.portals_dir(dir)
+        config.portals = os.path.join(config.lustre, config.portals)
+        debug('config.portals B', config.portals)
 
 def sysctl(path, val):
-    if config.noexec():
+    debug("+ sysctl", path, val)
+    if config.noexec:
         return
     try:
         fp = open(os.path.join('/proc/sys', path), 'w')
         fp.write(str(val))
         fp.close()
     except IOError, e:
-        print e
+        panic(str(e))
 
 
 def sys_set_debug_path():
-    debug("debug path: ", config.debug_path())
-    sysctl('portals/debug_path', config.debug_path())
+    sysctl('portals/debug_path', config.debug_path)
 
-def sys_set_recovery_upcall(upcall):
+def sys_set_lustre_upcall(upcall):
     # the command overrides the value in the node config
-    if config.recovery_upcall():
-        upcall = config.recovery_upcall()
+    if config.lustre_upcall:
+        upcall = config.lustre_upcall
+    elif config.upcall:
+        upcall = config.upcall
     if upcall:
-        debug("setting recovery_upcall:", upcall)
-        sysctl('lustre/recovery_upcall', upcall)
+        sysctl('lustre/upcall', upcall)
+
+def sys_set_portals_upcall(upcall):
+    # the command overrides the value in the node config
+    if config.portals_upcall:
+        upcall = config.portals_upcall
+    elif config.upcall:
+        upcall = config.upcall
+    if upcall:
+        sysctl('portals/upcall', upcall)
 
 def sys_set_timeout(timeout):
     # the command overrides the value in the node config
-    if config.timeout() > 0:
-        timeout = config.timeout()
-    if timeout > 0:
-        debug("setting timeout:", timeout)
+    if config.timeout > 0:
+        timeout = config.timeout
+    if timeout != None and timeout > 0:
         sysctl('lustre/timeout', timeout)
 
-def sys_set_ptldebug(ptldebug):
-    # the command overrides the value in the node config
-    if config.ptldebug():
-        ptldebug = config.ptldebug()
-    sysctl('portals/debug', ptldebug)
+def sys_set_ptldebug():
+    if config.ptldebug != None:
+        try:
+            val = eval(config.ptldebug, ptldebug_names)
+            val = "0x%x" % (val,)
+            sysctl('portals/debug', val)
+        except NameError, e:
+            panic(str(e))
+
+def sys_set_subsystem():
+    if config.subsystem != None:
+        try:
+            val = eval(config.ptldebug, ptldebug_names)
+            val = "0x%x" % (val,)
+            sysctl('portals/subsystem_debug', val)
+        except NameError, e:
+            panic(str(e))
 
 def sys_set_netmem_max(path, max):
     debug("setting", path, "to at least", max)
-    if config.noexec():
+    if config.noexec:
         return
     fp = open(path)
     str = fp.readline()
@@ -2351,6 +2101,20 @@ def add_to_path(new_dir):
         return
     os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
     
+def default_debug_path():
+    path = '/tmp/lustre-log'
+    if os.path.isdir('/r'):
+        return '/r' + path
+    else:
+        return path
+
+def default_gdb_script():
+    script = '/tmp/ogdb'
+    if os.path.isdir('/r'):
+        return '/r' + script
+    else:
+        return script
+
 
 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
 # ensure basic elements are in the system path
@@ -2358,13 +2122,94 @@ def sanitise_path():
     for dir in DEFAULT_PATH:
         add_to_path(dir)
 
-# Initialize or shutdown lustre according to a configuration file
-#   * prepare the system for lustre
-#   * configure devices with lctl
-# Shutdown does steps in reverse
-#
+# global hack for the --select handling
+tgt_select = {}
+def init_select(arg):
+    # arg = "service=nodeA,service2=nodeB"
+    global tgt_select
+    list = string.split(arg, ',')
+    for entry in list:
+        srv, node = string.split(entry, '=')
+        tgt_select[srv] = node
+
+def get_select(srv):
+    if tgt_select.has_key(srv):
+        return tgt_select[srv]
+    return None
+
+
+PARAM = Lustre.Options.PARAM
+INTPARAM = Lustre.Options.INTPARAM
+lconf_options = [
+    ('verbose,v', "Print system commands as they are run"),
+    ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
+    ('config', "Cluster config name used for LDAP query", PARAM),
+    ('select', "service=nodeA,service2=nodeB ", PARAM),
+    ('node',   "Load config for <nodename>", PARAM),
+    ('cleanup,d', "Cleans up config. (Shutdown)"),
+    ('force,f', "Forced unmounting and/or obd detach during cleanup",
+               Lustre.Options.FLAG, 0),
+    ('mds_ost_conn', "Open connections to OSTs on the MDS"),
+    ('failover',"""Used to shut down without saving state.
+                   This will allow this node to "give up" a service to a
+                   another node for failover purposes. This will not
+                   be a clean shutdown.""",
+               Lustre.Options.FLAG, 0),
+    ('gdb', """Prints message after creating gdb module script
+                    and sleeps for 5 seconds."""),
+    ('noexec,n', """Prints the commands and steps that will be run for a
+                    config without executing them. This can used to check if a
+                    config file is doing what it should be doing"""),
+    ('nomod', "Skip load/unload module step."),
+    ('nosetup', "Skip device setup/cleanup step."),
+    ('reformat', "Reformat all devices (without question)"),
+    ('dump',  "Dump the kernel debug log to file before portals is unloaded",
+               PARAM),
+    ('minlevel', "Minimum level of services to configure/cleanup",
+                 INTPARAM, 0),
+    ('maxlevel', """Maximum level of services to configure/cleanup 
+                    Levels are aproximatly like:
+                            10 - network
+                            20 - device, ldlm
+                            30 - osd, mdd
+                            40 - mds, ost
+                            70 - mountpoint, echo_client, osc, mdc, lov""",
+               INTPARAM, 100),
+    ('lustre', """Base directory of lustre sources. This parameter will
+                  cause lconf to load modules from a source tree.""", PARAM),
+    ('portals', """Portals source directory.  If this is a relative path,
+                   then it is assumed to be relative to lustre. """, PARAM),
+    ('timeout', "Set recovery timeout", PARAM),
+    ('upcall',  "Set both portals and lustre upcall script", PARAM),
+    ('lustre_upcall', "Set lustre upcall script", PARAM),
+    ('portals_upcall', "Set portals upcall script", PARAM),
+    ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
+    ('ptldebug', "Set the portals debug level",  PARAM),
+    ('subsystem', "Set the portals debug subsystem",  PARAM),
+    ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
+    ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
+# Client recovery options
+    ('recover', "Recover a device"),
+    ('group', "The group of devices to configure or cleanup", PARAM),
+    ('tgt_uuid', "The failed target (required for recovery)", PARAM),
+    ('client_uuid', "The failed client (required for recovery)", PARAM),
+    ('conn_uuid', "The failed connection (required for recovery)", PARAM),
+    ]      
+
 def main():
-    global  lctl, MAXTCPBUF
+    global lctl, config
+
+    # in the upcall this is set to SIG_IGN
+    signal.signal(signal.SIGCHLD, signal.SIG_DFL)
+    
+    cl = Lustre.Options("lconf", "config.xml", lconf_options)
+    try:
+        config, args = cl.parse(sys.argv[1:])
+    except Lustre.OptionError, e:
+        print e
+        sys.exit(1)
+
+    setupModulePath(sys.argv[0])
 
     host = socket.gethostname()
 
@@ -2380,7 +2225,6 @@ def main():
 
     sanitise_path()
 
-    args = parse_cmdline(sys.argv[1:])
     if len(args) > 0:
         if not os.access(args[0], os.R_OK):
             print 'File not found or readable:', args[0]
@@ -2390,44 +2234,48 @@ def main():
         except Exception:
             panic("%s does not appear to be a config file." % (args[0]))
             sys.exit(1) # make sure to die here, even in debug mode.
-        db = LustreDB_XML(dom.documentElement, dom.documentElement)
-    elif config.ldapurl():
-        if not config.config_name():
+        db = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
+    elif config.ldapurl:
+        if not config.config:
             panic("--ldapurl requires --config name")
-        dn = "config=%s,fs=lustre" % (config.config_name())
-        db = LustreDB_LDAP('', {}, base=dn, url = config.ldapurl())
+        dn = "config=%s,fs=lustre" % (config.config)
+        db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
     else:
-        usage()
+        cl.usage()
+        sys.exit(1)
+
+    ver = db.get_version()
+    if not ver:
+        panic("No version found in config data, please recreate.")
+    if ver != Lustre.CONFIG_VERSION:
+        panic("Config version", ver, "does not match lconf version",
+              Lustre.CONFIG_VERSION)
 
     node_list = []
-    if config.node():
-        node_list.append(config.node())
+    if config.node:
+        node_list.append(config.node)
     else:
         if len(host) > 0:
             node_list.append(host)
         node_list.append('localhost')
+
     debug("configuring for host: ", node_list)
 
     if len(host) > 0:
-        config._debug_path = config._debug_path + '-' + host
-        config._gdb_script = config._gdb_script + '-' + host
-
-    setupModulePath(sys.argv[0])
+        config.debug_path = config.debug_path + '-' + host
+        config.gdb_script = config.gdb_script + '-' + host
 
     lctl = LCTLInterface('lctl')
-    if config.lctl_dump():
-        lctl.use_save_file(config.lctl_dump())
-    else:
-        sys_make_devices()
-        sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
-        sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
+
+    if config.lctl_dump:
+        lctl.use_save_file(config.lctl_dump)
 
     doHost(db, node_list)
 
 if __name__ == "__main__":
     try:
         main()
-    except LconfError, e:
+    except Lustre.LconfError, e:
         print e
     except CommandError, e:
         e.dump()
@@ -2435,4 +2283,3 @@ if __name__ == "__main__":
 
     if first_cleanup_error:
         sys.exit(first_cleanup_error)
-