b=7165

author jacob <jacob>

Thu, 4 Aug 2005 18:06:06 +0000 (18:06 +0000)

committer jacob <jacob>

Thu, 4 Aug 2005 18:06:06 +0000 (18:06 +0000)
author jacob <jacob>
Thu, 4 Aug 2005 18:06:06 +0000 (18:06 +0000)
committer jacob <jacob>
Thu, 4 Aug 2005 18:06:06 +0000 (18:06 +0000)
diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre

index 72a744d..60b1bda 100755 (executable)
--- a/lustre/scripts/lustre
+++ b/lustre/scripts/lustre
@@ -10,18 +10,18 @@
  # pidfile: /var/run/lustre.pid
  ### BEGIN INIT INFO
  # Provides: lustre
-# Required-Start: $network
+# Required-Start: $network +sshd
  # Required-Stop: $network
  # Should-Start:
  # Should-Stop:
  # Default-Start: 
-# Default-Stop: 0 1 2 6
+# Default-Stop: 0 1 2 3 4 5 6
  # Short-Description: Lustre Lite network File System.
  # Description: This starts both Lustre client and server functions.
  ### END INIT INFO
  
  
-SERVICE=lustre
+SERVICE=${0##*/}
  LOCK=/var/lock/subsys/$SERVICE
  
  : ${LUSTRE_CFG:=/etc/lustre/lustre.cfg}
@@ -30,10 +30,19 @@ LOCK=/var/lock/subsys/$SERVICE
  
  : ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml}
  : ${LCONF:=/usr/sbin/lconf}
-: ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
-: ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
  : ${LCTL:=/usr/sbin/lctl}
  
+case "$SERVICE" in
+    lustre)
+       : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
+       : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
+       ;;
+    *)
+       : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"}
+       : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"}
+       ;;
+esac
+
  # Source function library.
  if [ -f /etc/init.d/functions ] ; then
     . /etc/init.d/functions
@@ -66,6 +75,21 @@ check_start_stop() {
  }
  
  start() {
+       if [ -x "/usr/sbin/clustat" -a "${SERVICE}" = "lustre" ] ; then
+           if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then
+               cat >&2 <<EOF
+This script was run directly, which can be dangerous if you are using
+clumanager to manage Lustre services.
+
+If you are not using clumanager for Lustre services, run the following
+command to have this script start Lustre instead:
+
+touch /etc/lustre/start-despite-clumanager
+EOF
+               RETVAL=1
+               return
+           fi
+       fi
         check_start_stop
         echo -n "Starting $SERVICE: "
         if [ $UID -ne 0 ]; then
diff --git a/lustre/scripts/lustrefs b/lustre/scripts/lustrefs

index 56cf7ec..18c32b1 100644 (file)
--- a/lustre/scripts/lustrefs
+++ b/lustre/scripts/lustrefs
@@ -14,7 +14,7 @@
  #
  ### BEGIN INIT INFO
  # Provides: lustrefs
-# Required-Start: $network $remote_fs
+# Required-Start: $network $remote_fs +sshd +lustre
  # Required-Stop: $network $remote_fs
  # Should-Start: 
  # Should-Stop: 
diff --git a/lustre/utils/lconf b/lustre/utils/lconf

index 5bec26f..2c645c1 100755 (executable)
--- a/lustre/utils/lconf
+++ b/lustre/utils/lconf
@@ -27,7 +27,7 @@
  #
  # Based in part on the XML obdctl modifications done by Brian Behlendorf
  
-import sys, getopt, types
+import sys, getopt, types, errno
  import string, os, stat, popen2, socket, time, random, fcntl, select
  import re, exceptions, signal, traceback
  import xml.dom.minidom
@@ -2713,6 +2713,50 @@ def doUnloadModules(services):
          if n.safe_to_clean_modules():
              n.cleanup_module()
  
+def doMakeServiceScript(services):
+    if config.nosetup:
+        return
+    try:
+        os.makedirs(config.service_scripts)
+    except OSError, e:
+        if e[0] != errno.EEXIST:
+            panic("Couldn't create scripts dir " + config.service_scripts + ": " + e[1])
+    
+    for s in services:
+        if s[1].get_class() != 'osd' and s[1].get_class() != 'mdsdev':
+            continue
+
+        target_uuid = s[1].get_first_ref('target')
+        target = toplustreDB.lookup(target_uuid)
+        target_symlink = config.service_scripts + "/" + target.getName()
+        if config.force:
+            try:
+                try:
+                    os.unlink(target_symlink)
+                    if config.verbose:
+                        print "Removed " + target_symlink
+                except OSError, e:
+                    if e[0] != errno.EISDIR:
+                        raise e
+                    os.rmdir(target_symlink)
+                    if config.verbose:
+                        print "Removed " + target_symlink
+            except OSError, e:
+                if e[0] != errno.ENOENT:
+                    panic("Error removing " + target_symlink + ": " + e[1])
+                    
+        try:
+            os.symlink("/etc/init.d/lustre", target_symlink)
+            if config.verbose:
+                print "Created service link " + target_symlink + " to /etc/init.d/lustre"
+
+        except OSError, e:
+            if e[0] == errno.EEXIST:
+                extra_error = " (use --force option to remove existing files)"
+            else:
+                extra_error = ""
+            panic("Error creating " + target_symlink + ": " + e[1] + extra_error)
+
  #
  # Load profile for
  def doHost(lustreDB, hosts):
@@ -2744,7 +2788,11 @@ def doHost(lustreDB, hosts):
      # if not cleaning, load modules first.
      prof_list = node_db.get_refs('profile')
  
-    if config.write_conf:
+    if config.make_service_scripts:
+        for_each_profile(node_db, prof_list, doMakeServiceScript)
+        return
+    
+    elif config.write_conf:
          for_each_profile(node_db, prof_list, doModules)
          sys_make_devices()
          for_each_profile(node_db, prof_list, doWriteconf)
@@ -2810,6 +2858,147 @@ def doHost(lustreDB, hosts):
          for_each_profile(node_db, prof_list, doSetup)
          lustreDB.close()
  
+def add_clumanager_node(node_db, nodes, services):
+    new_services = []
+    node_name = node_db.getUUID()
+    nodes[node_name] = []
+    
+    for prof_uuid in node_db.get_refs('profile'):
+        prof_db = toplustreDB.lookup(prof_uuid)
+        for ref_class, ref_uuid in prof_db.get_all_refs():
+            if ref_class not in ('osd', 'mdsdev'):
+                continue
+            devdb = toplustreDB.lookup(ref_uuid)
+            tgt_uuid = devdb.get_first_ref('target')
+
+            nodes[node_name].append(ref_uuid)
+
+            if not services.has_key(tgt_uuid):
+                if config.verbose:
+                    print "New service: " + tgt_uuid + " (originally found on " + node_name + ")"
+                new_services.append(tgt_uuid)
+                services[tgt_uuid] = []
+            services[tgt_uuid].append(ref_uuid)
+
+    return new_services
+
+def add_clumanager_services(new_services, nodes, dev_list):
+    new_nodes = []
+    for devdb in dev_list:
+        tgt_uuid = devdb.get_first_ref('target')
+        if tgt_uuid in new_services:
+            node_uuid = devdb.get_first_ref('node')
+        
+            if not (nodes.has_key(node_uuid) or node_uuid in new_nodes):
+                if config.verbose:
+                    print "New node: " + node_uuid + " for service " + tgt_uuid
+                new_nodes.append(node_uuid)
+
+    return new_nodes
+
+def doClumanager(lustreDB, hosts):
+    nodes = {}
+    services = {}
+
+    dev_list = []
+    
+    for dev_uuid in toplustreDB.get_refs('osd') + toplustreDB.get_refs('mdsdev'):
+        dev_list.append(lustreDB.lookup(dev_uuid))
+
+    node_db = None
+    for h in hosts:
+        node_db = lustreDB.lookup_name(h, 'node')
+        if node_db:
+            our_host = h
+            new_services = add_clumanager_node(node_db, nodes, services)
+            break
+            
+    if not node_db:
+        panic('No host entry found.')
+
+    while 1:
+        if len(new_services) == 0:
+            break
+        
+        new_nodes = add_clumanager_services(new_services, nodes, dev_list)
+        if len(new_nodes) == 0:
+            break
+
+        if len(new_nodes) + len(nodes.keys()) > 8:
+            panic("CluManager only supports 8 nodes per failover \"cluster.\"")
+
+        new_services = []
+        for node_uuid in new_nodes:
+            node_db = lustreDB.lookup(node_uuid)
+            if not node_db:
+                panic("No node entry for " + node_uuid + " was found.")
+
+            new_services += add_clumanager_node(node_db, nodes, services)
+
+    print """<?xml version="1.0"?>
+<cluconfig version="3.0">
+  <clumembd broadcast="no" interval="750000" loglevel="5" multicast="yes" multicast_ipaddress="225.0.0.11" thread="yes" tko_count="20"/>
+  <cluquorumd loglevel="5" pinginterval="2"/>
+  <clurmtabd loglevel="5" pollinterval="4"/>
+  <clusvcmgrd loglevel="5"/>
+  <clulockd loglevel="5"/>
+  <cluster config_viewnumber="1" name="%s"/>
+  <sharedstate driver="libsharedraw.so" rawprimary="%s" rawshadow="%s" type="raw"/>
+  <members> """ % (our_host, config.rawprimary, config.rawsecondary)
+
+    nodekeys = nodes.keys()
+    nodekeys.sort()
+
+    servicekeys = services.keys()
+    servicekeys.sort()
+    
+    i = 0
+    for node in nodekeys:
+        nodedb = lustreDB.lookup(node)
+        print "    <member id=\"%d\" name=\"%s\" watchdog=\"yes\"/>" % (i, nodedb.getName())
+        i += 1
+
+    print "  </members>\n  <failoverdomains>"
+
+    i = 0
+    for service in servicekeys:
+        svcdb = lustreDB.lookup(service)
+        print "    <failoverdomain id=\"%d\" name=\"%s\" ordered=\"yes\" restricted=\"yes\">" % (i, svcdb.getName())
+        i += 1
+
+        j = 0
+        active_uuid = get_active_target(svcdb)
+        for svc_uuid in [active_uuid] + services[service]:
+            if svc_uuid == active_uuid and j > 0:
+                continue
+            svcdb = lustreDB.lookup(svc_uuid)
+
+            svc_node_uuid = svcdb.get_first_ref('node')
+            svc_nodedb = lustreDB.lookup(svc_node_uuid)
+
+            print "      <failoverdomainnode id=\"%d\" name=\"%s\"/>" % (j, svc_nodedb.getName())
+            j += 1
+
+        print "    </failoverdomain>"
+
+    print "  </failoverdomains>\n  <services>"
+
+    i = 0
+    for service in servicekeys:
+        svcdb = lustreDB.lookup(service)
+        active_uuid = get_active_target(svcdb)
+        activedb = lustreDB.lookup(active_uuid)
+
+        svc_node_uuid = activedb.get_first_ref('node')
+        svc_nodedb = lustreDB.lookup(svc_node_uuid)
+
+        print "    <service checkinterval=\"30\" failoverdomain=\"%s\" id=\"%d\" name=\"%s\" userscript=\"%s/%s\">" \
+              % ( svcdb.getName(), i, svcdb.getName(), config.service_scripts, svcdb.getName())
+        print "      <service_ipaddresses/>\n    </service>"
+        i += 1
+
+    print "  </services>\n</cluconfig>"
+
  def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
      tgt = lustreDB.lookup(tgt_uuid)
      if not tgt:
@@ -3080,6 +3269,11 @@ lconf_options = [
      ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
      ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
      ('allow_unprivileged_port', "Allow connections from unprivileged ports"),
+    ('clumanager', "Generate CluManager config file for this node's cluster"),
+    ('rawprimary', "For clumanager, device of the primary quorum", PARAM, "/dev/raw/raw1"),
+    ('rawsecondary', "For clumanager, device of the secondary quorum", PARAM, "/dev/raw/raw2"),
+    ('service_scripts', "For clumanager, directory containing per-service scripts", PARAM, "/etc/lustre/services"),
+    ('make_service_scripts', "Create per-service symlinks for use with clumanager"),
  # Client recovery options
      ('recover', "Recover a device"),
      ('group', "The group of devices to configure or cleanup", PARAM),
@@ -3200,7 +3394,10 @@ def main():
          lctl.clear_log(config.record_device, config.record_log)
          lctl.record(config.record_device, config.record_log)
  
-    doHost(lustreDB, node_list)
+    if config.clumanager:
+        doClumanager(lustreDB, node_list)
+    else:
+        doHost(lustreDB, node_list)
  
      if config.record:
          lctl.end_record()
author	jacob <jacob>
	Thu, 4 Aug 2005 18:06:06 +0000 (18:06 +0000)
committer	jacob <jacob>
	Thu, 4 Aug 2005 18:06:06 +0000 (18:06 +0000)
lustre/scripts/lustre		patch \| blob \| history
lustre/scripts/lustrefs		patch \| blob \| history
lustre/utils/lconf		patch \| blob \| history