# lconf is the main driver script for starting and stopping
# lustre filesystem services.
#
-# Based in part on the XML obdctl modifications done by Brian Behlendorf
+# Based in part on the XML obdctl modifications done by Brian Behlendorf
import sys, getopt, types
import string, os, stat, popen2, socket, time, random, fcntl, select
PORTALS_DIR = '../portals'
# Needed to call lconf --record
-CONFIG_FILE = ""
+CONFIG_FILE = ""
# Please keep these in sync with the values in portals/kp30.h
-ptldebug_names = {
+ptldebug_names = {
"trace" : (1 << 0),
"inode" : (1 << 1),
"super" : (1 << 2),
if not first_cleanup_error:
first_cleanup_error = rc
-# ============================================================
+# ============================================================
# debugging and error funcs
def fixme(msg = "this feature"):
def my_int(s):
import types
if type(s) is types.IntType:
- return s
+ return s
try:
if (s[0:2] == '0x') or (s[0:1] == '0'):
return eval(s, {}, {})
return 0
except IOError:
return 0
-
+
def clean_pidfile(self):
""" Remove a stale pidfile """
log("removing stale pidfile:", self.pidfile())
os.unlink(self.pidfile())
except OSError, e:
log(self.pidfile(), e)
-
+
class AcceptorHandler(DaemonHandler):
def __init__(self, port, net_type):
DaemonHandler.__init__(self, "acceptor")
def command_line(self):
return string.join(map(str,(self.flags, self.port)))
-
+
acceptors = {}
# start the acceptors
if daemon.net_type == 'tcp' and not daemon.running():
daemon.start()
else:
- panic("run_one_acceptor: No acceptor defined for port:", port)
-
+ panic("run_one_acceptor: No acceptor defined for port:", port)
+
def stop_acceptor(port):
if acceptors.has_key(port):
daemon = acceptors[port]
if daemon.net_type == 'tcp' and daemon.running():
daemon.stop()
-
+
# ============================================================
# handle lctl interface
def use_save_file(self, file):
self.save_file = file
-
+
def record(self, dev_name, logname):
log("Recording log", logname, "on", dev_name)
self.record_device = dev_name
device $%s
record %s
%s""" % (self.record_device, self.record_log, cmds)
-
+
debug("+", cmd_line, cmds)
if config.noexec: return (0, [])
raise CommandError(self.lctl, out, rc)
return rc, out
-
def clear_log(self, dev, log):
""" clear an existing log """
cmds = """
quit""" % (net_type,
nid, hostaddr )
self.run(cmds)
-
+
def connect(self, srv):
self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
device $%s
recover %s""" %(dev_name, new_conn)
self.run(cmds)
-
+
# add a route to a range
def add_route(self, net, gw, lo, hi):
cmds = """
except CommandError, e:
log ("ignore: ")
e.dump()
-
+
def del_route(self, net, gw, lo, hi):
cmds = """
ignore_errors
quit""" % (net_type,
nid)
self.run(cmds)
-
+
# disconnect one connection
def disconnect(self, srv):
self.del_uuid(srv.nid_uuid)
attach %s %s %s
quit""" % (type, name, uuid)
self.run(cmds)
-
+
def setup(self, name, setup = ""):
cmds = """
cfg_device %s
setup %s
quit""" % (name, setup)
self.run(cmds)
-
+
# create a new device with lctl
def newdev(self, type, name, uuid, setup = ""):
except CommandError, e:
self.cleanup(name, uuid, 0)
raise e
-
+
# cleanup a device
def cleanup(self, name, uuid, force, failover = 0):
modbase = src_dir +'/'+ dev_dir +'/'+ modname
for modext in '.ko', '.o':
module = modbase + modext
- try:
+ try:
if os.access(module, os.R_OK):
return module
except OSError:
if devsize > 1024 * 1024:
jsize = ((devsize / 102400) * 4)
if jsize > 400:
- jsize = 400
+ jsize = 400
if jsize: jopt = "-J size=%d" %(jsize,)
if isize: iopt = "-I %d" %(isize,)
mkfs = 'mkfs.ext2 -j -b 4096 '
if not os.access(loop + str(0), os.R_OK):
panic("can't access loop devices")
return loop
-
+
# find loop device assigned to thefile
def find_loop(file):
loop = loop_base()
# determine if dev is formatted as a <fstype> filesystem
def need_format(fstype, dev):
- # FIXME don't know how to implement this
+ # FIXME don't know how to implement this
return 0
# initialize a block device if needed
# panic("device:", dev,
# "not prepared, and autoformat is not set.\n",
# "Rerun with --reformat option to format ALL filesystems")
-
return dev
def if2addr(iface):
else:
local = sys_get_local_address(net_type, wildcard, cluster_id)
return local
-
+
def sys_get_local_address(net_type, wildcard, cluster_id):
"""Return the local address for the network type."""
local = ""
elan_id = a[1]
break
try:
- nid = my_int(cluster_id) + my_int(elan_id)
+ nid = my_int(cluster_id) + my_int(elan_id)
local = "%d" % (nid)
except ValueError, e:
local = elan_id
fp = open('/proc/sys/kernel/osrelease')
lines = fp.readlines()
fp.close()
-
+
for l in lines:
version = string.split(l)
a = string.split(version[0], '.')
except IOError, e:
log(e)
return 0
-
class kmod:
"""Manage kernel modules"""
self._server = None
self._connected = 0
self.kmod = kmod(config.lustre, config.portals)
-
+
def info(self, *args):
msg = string.join(map(str,args))
print self.module_name + ":", self.name, self.uuid, msg
log(self.module_name, "cleanup failed: ", self.name)
e.dump()
cleanup_error(e.rc)
-
+
def add_portals_module(self, dev_dir, modname):
"""Append a module to list of modules to load."""
self.kmod.add_portals_module(dev_dir, modname)
def load_module(self):
"""Load all the modules in the list in the order they appear."""
self.kmod.load_module()
-
+
def cleanup_module(self):
"""Unload the modules in the list in reverse order."""
if self.safe_to_clean():
def safe_to_clean(self):
return 1
-
+
def safe_to_clean_modules(self):
return self.safe_to_clean()
-
+
class Network(Module):
def __init__(self,db):
Module.__init__(self, 'NETWORK', db)
return None
return Network(srvdb)
-
+
def prepare(self):
if is_network_prepared():
return
self.osclist.append(osc)
else:
panic('osc not found:', obd_uuid)
-
+
def prepare(self):
if is_prepared(self.name):
return
if self.config_only:
panic("Can't prepare config_only LOV ", self.name)
-
+
for osc in self.osclist:
try:
# Only ignore connect failures with --force, which
self.journal_size = self.db.get_val_int('journalsize', 0)
self.fstype = self.db.get_val('fstype', '')
self.nspath = self.db.get_val('nspath', '')
- self.mkfsoptions = self.db.get_val('mkfsoptions', '')
+ self.mkfsoptions = '-i 4096 ' + self.db.get_val('mkfsoptions', '')
self.mountfsoptions = self.db.get_val('mountfsoptions', '')
# overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
target_uuid = self.db.get_first_ref('target')
def load_module(self):
if self.active:
Module.load_module(self)
-
+
def prepare(self):
if is_prepared(self.name):
return
self.mkfsoptions)
if not is_prepared('MDT'):
lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
- try:
+ try:
mountfsoptions = def_mount_options(self.fstype, 'mds')
-
+
if config.mountfsoptions:
if mountfsoptions:
mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
self.inode_size, self.mkfsoptions)
lctl.newdev("mds", self.name, self.uuid,
setup ="%s %s" %(blkdev, self.fstype))
-
+
# record logs for the MDS lov
for uuid in self.filesystem_uuids:
log("recording clients for filesystem:", uuid)
self.size = self.db.get_val_int('devsize', 0)
self.journal_size = self.db.get_val_int('journalsize', 0)
self.inode_size = self.db.get_val_int('inodesize', 0)
- self.mkfsoptions = self.db.get_val('mkfsoptions', '')
+ self.mkfsoptions = '-i 16384 ' + self.db.get_val('mkfsoptions', '')
self.mountfsoptions = self.db.get_val('mountfsoptions', '')
self.fstype = self.db.get_val('fstype', '')
self.nspath = self.db.get_val('nspath', '')
self.active = 0
if self.active and config.group and config.group != ost.get_val('group'):
self.active = 0
-
+
self.target_dev_uuid = self.uuid
self.uuid = target_uuid
# modules
self.inode_size, self.mkfsoptions)
mountfsoptions = def_mount_options(self.fstype, 'ost')
-
+
if config.mountfsoptions:
if mountfsoptions:
mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
Client.__init__(self, db, uuid, 'mgmt_cli', '',
self_name = mgmtcli_name_for_uuid(db.getUUID()),
module_dir = 'mgmt')
-
+
class COBD(Module):
def __init__(self, db):
Module.__init__(self, 'COBD', db)
run("mkdir", self.path)
ret, val = run(cmd)
if ret:
- self.mdc.cleanup()
+ self.mdc.cleanup()
self.vosc.cleanup()
panic("mount failed:", self.path, ":", string.join(val))
return srv_list
-# the order of iniitailization is based on level.
+# the order of iniitailization is based on level.
def getServiceLevel(self):
type = self.get_class()
ret=0;
panic("Unknown type: ", type)
if ret < config.minlevel or ret > config.maxlevel:
- ret = 0
+ ret = 0
return ret
#
# [(level, db_object),]
def getServices(self):
list = []
- for ref_class, ref_uuid in self.get_all_refs():
+ for ref_class, ref_uuid in self.get_all_refs():
servdb = self.lookup(ref_uuid)
if servdb:
level = getServiceLevel(servdb)
############################################################
-# MDC UUID hack -
+# MDC UUID hack -
# FIXME: clean this mess up!
#
# OSC is no longer in the xml, so we have to fake it.
if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
result.append((srv, r))
return result
-
+
def get_active_target(db):
target_uuid = db.getUUID()
target_name = db.getName()
net = Network(n)
if net.nid_uuid == nid_uuid:
return net
-
+
############################################################
# lconf level logic
#
# Prepare the system to run lustre using a particular profile
-# in a the configuration.
+# in a the configuration.
# * load & the modules
# * setup networking for the current node
# * make sure partitions are in place and prepared
panic("profile:", prof_uuid, "not found.")
services = getServices(prof_db)
operation(services)
-
+
def doWriteconf(services):
if config.nosetup:
return
for s in services:
n = newService(s[1])
n.prepare()
-
+
def doModules(services):
if config.nomod:
return
n.cleanup_module()
#
-# Load profile for
+# Load profile for
def doHost(lustreDB, hosts):
global is_router, local_node_name
node_db = None
timeout = node_db.get_val_int('timeout', 0)
ptldebug = node_db.get_val('ptldebug', '')
subsystem = node_db.get_val('subsystem', '')
-
+
find_local_clusters(node_db)
if not is_router:
find_local_routes(lustreDB)
base = os.path.dirname(cmd)
if development_mode():
if not config.lustre:
- debug('using objdir module paths')
+ debug('using objdir module paths')
config.lustre = (os.path.join(base, ".."))
# normalize the portals dir, using command line arg if set
if config.portals:
debug('config.portals', config.portals)
elif config.lustre and config.portals:
# production mode
- # if --lustre and --portals, normalize portals
+ # if --lustre and --portals, normalize portals
# can ignore POTRALS_DIR here, since it is probly useless here
config.portals = os.path.join(config.lustre, config.portals)
debug('config.portals B', config.portals)
fp = open(path, 'w')
fp.write('%d\n' %(max))
fp.close()
-
-
+
def sys_make_devices():
if not os.access('/dev/portals', os.R_OK):
run('mknod /dev/portals c 10 240')
if new_dir in syspath:
return
os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
-
+
def default_debug_path():
path = '/tmp/lustre-log'
if os.path.isdir('/r'):
PARAM),
('minlevel', "Minimum level of services to configure/cleanup",
INTPARAM, 0),
- ('maxlevel', """Maximum level of services to configure/cleanup
+ ('maxlevel', """Maximum level of services to configure/cleanup
Levels are aproximatly like:
10 - netwrk
20 - device, ldlm
('inactive', """The name of an inactive service, to be ignored during
mounting (currently OST-only). Can be repeated.""",
PARAMLIST),
- ]
+ ]
def main():
global lctl, config, toplustreDB, CONFIG_FILE
# in the upcall this is set to SIG_IGN
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
-
+
cl = Lustre.Options("lconf", "config.xml", lconf_options)
try:
config, args = cl.parse(sys.argv[1:])
random.seed(seed)
sanitise_path()
-
+
init_select(config.select)
if len(args) > 0: