X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Futils%2Flconf;h=bfaa97aa8d2a213e026f722871f557df99785d7f;hp=01f7c75abb59b26ea5770e4942bf4d49b77db144;hb=aa84b33f3c2a7c66da7ed038f999d28d9fbf5837;hpb=57e6d88a8a8d858e2d74aeefba4c764ad08cf86d diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 01f7c75..bfaa97a 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -1,31 +1,51 @@ #!/usr/bin/env python # -# Copyright (C) 2002-2003 Cluster File Systems, Inc. -# Authors: Robert Read -# Mike Shaver -# This file is part of Lustre, http://www.lustre.org. +# GPL HEADER START # -# Lustre is free software; you can redistribute it and/or -# modify it under the terms of version 2 of the GNU General Public -# License as published by the Free Software Foundation. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # -# Lustre is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 only, +# as published by the Free Software Foundation. # -# You should have received a copy of the GNU General Public License -# along with Lustre; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License version 2 for more details (a copy is included +# in the LICENSE file that accompanied this code). +# +# You should have received a copy of the GNU General Public License +# version 2 along with this program; If not, see +# http://www.sun.com/software/products/lustre/docs/GPLv2.pdf +# copy of GPLv2]. +# +# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, +# CA 95054 USA or visit www.sun.com if you need additional information or +# have any questions. +# +# GPL HEADER END +# + +# +# Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. +# Use is subject to license terms. +# + +# +# This file is part of Lustre, http://www.lustre.org/ +# Lustre is a trademark of Sun Microsystems, Inc. +# +# Author: Robert Read +# Author: Mike Shaver # # lconf - lustre configuration tool # # lconf is the main driver script for starting and stopping # lustre filesystem services. # -# Based in part on the XML obdctl modifications done by Brian Behlendorf +# Based in part on the XML obdctl modifications done by Brian Behlendorf -import sys, getopt, types +import sys, getopt, types, errno import string, os, stat, popen2, socket, time, random, fcntl, select import re, exceptions, signal, traceback import xml.dom.minidom @@ -35,34 +55,43 @@ if sys.version[0] == '1': else: from fcntl import F_GETFL, F_SETFL -PYMOD_DIR = "/usr/lib/lustre/python" +PYMOD_DIR = ["/usr/lib64/lustre/python", "/usr/lib/lustre/python"] +PLATFORM = '' +KEXTPATH = '' +if string.find(sys.platform, 'linux') != -1: + PLATFORM='LINUX' +elif string.find(sys.platform, 'darwin') != -1: + PLATFORM='DARWIN' + KEXTPATH='/System/Library/Extensions/' +else: + PLATFORM='Unsupported' def development_mode(): base = os.path.dirname(sys.argv[0]) - if os.access(base+"/Makefile.am", os.R_OK): + if os.access(base+"/Makefile", os.R_OK): return 1 return 0 -if not development_mode(): - sys.path.append(PYMOD_DIR) +if development_mode(): + sys.path.append('../utils') +else: + sys.path.extend(PYMOD_DIR) import Lustre # Global parameters MAXTCPBUF = 16777216 -DEFAULT_TCPBUF = 8388608 -DEFAULT_PORT = 988 # # Maximum number of devices to search for. # (the /dev/loop* nodes need to be created beforehand) MAX_LOOP_DEVICES = 256 -PORTALS_DIR = 'portals' +PORTALS_DIR = '../lnet' # Needed to call lconf --record -CONFIG_FILE = "" +CONFIG_FILE = "" -# Please keep these in sync with the values in portals/kp30.h -ptldebug_names = { +# Please keep these in sync with the values in lnet/include/libcfs/libcfs.h +ptldebug_names = { "trace" : (1 << 0), "inode" : (1 << 1), "super" : (1 << 2), @@ -77,7 +106,8 @@ ptldebug_names = { "buffs" : (1 << 11), "other" : (1 << 12), "dentry" : (1 << 13), - "portals" : (1 << 14), + "portals" : (1 << 14), # deprecated + "lnet" : (1 << 14), "page" : (1 << 15), "dlmtrace" : (1 << 16), "error" : (1 << 17), @@ -86,6 +116,11 @@ ptldebug_names = { "rpctrace" : (1 << 20), "vfstrace" : (1 << 21), "reada" : (1 << 22), + "mmap" : (1 << 23), + "config" : (1 << 24), + "console" : (1 << 25), + "quota" : (1 << 26), + "sec" : (1 << 27), } subsystem_names = { @@ -98,20 +133,29 @@ subsystem_names = { "log" : (1 << 6), "llite" : (1 << 7), "rpc" : (1 << 8), - "mgmt" : (1 << 9), - "portals" : (1 << 10), - "socknal" : (1 << 11), - "qswnal" : (1 << 12), - "pinger" : (1 << 13), - "filter" : (1 << 14), - "ptlbd" : (1 << 15), - "echo" : (1 << 16), - "ldlm" : (1 << 17), - "lov" : (1 << 18), - "gmnal" : (1 << 19), - "ptlrouter" : (1 << 20), - "cobd" : (1 << 21), - "ibnal" : (1 << 22), + "lnet" : (1 << 10), + "portals" : (1 << 10), # deprecated + "lnd" : (1 << 11), + "nal" : (1 << 11), # deprecated + "pinger" : (1 << 12), + "filter" : (1 << 13), + "ptlbd" : (1 << 14), # deprecated + "echo" : (1 << 15), + "ldlm" : (1 << 16), + "lov" : (1 << 17), + "ptlrouter" : (1 << 18), # deprecated + "cobd" : (1 << 19), + "sm" : (1 << 20), + "asobd" : (1 << 21), + "confobd" : (1 << 22), # deprecated + "lmv" : (1 << 23), + "cmobd" : (1 << 24), + "sec" : (1 << 25), + "sec" : (1 << 26), + "gss" : (1 << 27), + "gks" : (1 << 28), + "mgc" : (1 << 29), + "mgs" : (1 << 30), } @@ -121,11 +165,11 @@ def cleanup_error(rc): if not first_cleanup_error: first_cleanup_error = rc -# ============================================================ +# ============================================================ # debugging and error funcs def fixme(msg = "this feature"): - raise Lustre.LconfError, msg + ' not implmemented yet.' + raise Lustre.LconfError, msg + ' not implemented yet.' def panic(*args): msg = string.join(map(str,args)) @@ -143,20 +187,27 @@ def logall(msgs): print string.strip(s) def debug(*args): + # apparently, (non)execution of the following line affects mds device + # startup order (e.g. two mds's using loopback devices), so always do it. + msg = string.join(map(str,args)) if config.verbose: - msg = string.join(map(str,args)) print msg # ack, python's builtin int() does not support '0x123' syntax. # eval can do it, although what a hack! def my_int(s): + import types + if type(s) is types.IntType: + return s try: - if s[0:2] == '0x': + if (s[0:2] == '0x') or (s[0:1] == '0'): return eval(s, {}, {}) else: return int(s) except SyntaxError, e: raise ValueError("not a number") + except TypeError, e: + raise ValueError("not a number") except NameError, e: raise ValueError("not a number") @@ -185,113 +236,6 @@ class CommandError (exceptions.Exception): else: print self.cmd_err - -# ============================================================ -# handle daemons, like the acceptor -class DaemonHandler: - """ Manage starting and stopping a daemon. Assumes daemon manages - it's own pid file. """ - - def __init__(self, cmd): - self.command = cmd - self.path ="" - - def start(self): - if self.running(): - log(self.command, "already running.") - if not self.path: - self.path = find_prog(self.command) - if not self.path: - panic(self.command, "not found.") - ret, out = runcmd(self.path +' '+ self.command_line()) - if ret: - raise CommandError(self.path, out, ret) - - def stop(self): - if self.running(): - pid = self.read_pidfile() - try: - log ("killing process", pid) - os.kill(pid, 15) - #time.sleep(1) # let daemon die - except OSError, e: - log("unable to kill", self.command, e) - if self.running(): - log("unable to kill", self.command) - - def running(self): - pid = self.read_pidfile() - if pid: - try: - os.kill(pid, 0) - except OSError: - self.clean_pidfile() - else: - return 1 - return 0 - - def read_pidfile(self): - try: - fp = open(self.pidfile(), 'r') - pid = int(fp.read()) - fp.close() - return pid - except IOError: - return 0 - - def clean_pidfile(self): - """ Remove a stale pidfile """ - log("removing stale pidfile:", self.pidfile()) - try: - os.unlink(self.pidfile()) - except OSError, e: - log(self.pidfile(), e) - -class AcceptorHandler(DaemonHandler): - def __init__(self, port, net_type, send_mem, recv_mem, irq_aff): - DaemonHandler.__init__(self, "acceptor") - self.port = port - self.flags = '' - self.send_mem = send_mem - self.recv_mem = recv_mem - - if irq_aff: - self.flags = self.flags + ' -i' - - def pidfile(self): - return "/var/run/%s-%d.pid" % (self.command, self.port) - - def command_line(self): - return string.join(map(str,('-s', self.send_mem, '-r', self.recv_mem, self.flags, self.port))) - -acceptors = {} - -# start the acceptors -def run_acceptors(): - if config.lctl_dump or config.record: - return - for port in acceptors.keys(): - daemon = acceptors[port] - if not daemon.running(): - daemon.start() - -def run_one_acceptor(port): - if config.lctl_dump or config.record: - return - if acceptors.has_key(port): - daemon = acceptors[port] - if not daemon.running(): - daemon.start() - else: - panic("run_one_acceptor: No acceptor defined for port:", port) - -def stop_acceptor(port): - if acceptors.has_key(port): - daemon = acceptors[port] - if daemon.running(): - daemon.stop() - - # ============================================================ # handle lctl interface class LCTLInterface: @@ -315,7 +259,7 @@ class LCTLInterface: def use_save_file(self, file): self.save_file = file - + def record(self, dev_name, logname): log("Recording log", logname, "on", dev_name) self.record_device = dev_name @@ -347,12 +291,12 @@ class LCTLInterface: device $%s record %s %s""" % (self.record_device, self.record_log, cmds) - + debug("+", cmd_line, cmds) if config.noexec: return (0, []) child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command - child.tochild.write(cmds + "\n") + child.tochild.write(cmds + "\nq\n") child.tochild.close() # From "Python Cookbook" from O'Reilly @@ -398,7 +342,12 @@ class LCTLInterface: raise CommandError(self.lctl, out, rc) return rc, out - + def unconfigure_network(self): + """get lnet to unreference itself""" + cmds = """ + network unconfigure""" + self.run(cmds) + def clear_log(self, dev, log): """ clear an existing log """ cmds = """ @@ -408,41 +357,25 @@ class LCTLInterface: quit """ % (dev, log) self.run(cmds) - def network(self, net, nid): - """ set mynid """ - cmds = """ - network %s - mynid %s - quit """ % (net, nid) - self.run(cmds) - # create a new connection def add_uuid(self, net_type, uuid, nid): - cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type) + if net_type != 'lnet' and string.find(nid,'@') < 0: + nidstr = nid + "@" + net_type + else: + nidstr = nid + cmds = "\n add_uuid %s %s" %(uuid, nidstr) self.run(cmds) - def add_autoconn(self, net_type, send_mem, recv_mem, nid, hostaddr, - port, flags): - if net_type in ('tcp',) and not config.lctl_dump: - cmds = """ - network %s - send_mem %d - recv_mem %d - add_autoconn %s %s %d %s - quit""" % (net_type, - send_mem, - recv_mem, - nid, hostaddr, port, flags ) - self.run(cmds) - def connect(self, srv): - self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid) - if srv.net_type in ('tcp',) and not config.lctl_dump: - flags = 's' - if srv.irq_affinity: - flags = flags + 'i' - self.add_autoconn(srv.net_type, srv.send_mem, srv.recv_mem, - srv.nid, srv.hostaddr, srv.port, flags) + if not srv.nid_uuid: + panic('nid_uuid not set for ', srv.net_type, srv.nid) + hostaddr = srv.db.get_hostaddr() + if len(hostaddr) > 1: + panic('multiple --hostaddr for ', srv.nid_uuid, ' not supported') + elif len(hostaddr) == 1 and hostaddr[0] != srv.nid: + panic('different --hostaddr and --nid for ', srv.nid_uuid, ' not supported') + else: + self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid) # Recover a device def recover(self, dev_name, new_conn): @@ -450,68 +383,12 @@ class LCTLInterface: device $%s recover %s""" %(dev_name, new_conn) self.run(cmds) - - # add a route to a range - def add_route(self, net, gw, lo, hi): - cmds = """ - network %s - add_route %s %s %s - quit """ % (net, - gw, lo, hi) - try: - self.run(cmds) - except CommandError, e: - log ("ignore: ") - e.dump() - - def del_route(self, net, gw, lo, hi): - cmds = """ - ignore_errors - network %s - del_route %s %s %s - quit """ % (net, gw, lo, hi) - self.run(cmds) - - # add a route to a host - def add_route_host(self, net, uuid, gw, tgt): - self.add_uuid(net, uuid, tgt) - cmds = """ - network %s - add_route %s %s - quit """ % (net, - gw, tgt) - try: - self.run(cmds) - except CommandError, e: - log ("ignore: ") - e.dump() - - # add a route to a range - def del_route_host(self, net, uuid, gw, tgt): - self.del_uuid(uuid) - cmds = """ - ignore_errors - network %s - del_route %s %s - quit """ % (net, gw, tgt) - self.run(cmds) - - def del_autoconn(self, net_type, nid, hostaddr): - if net_type in ('tcp',) and not config.lctl_dump: - cmds = """ - ignore_errors - network %s - del_autoconn %s %s s - quit""" % (net_type, - nid, hostaddr) - self.run(cmds) - # disconnect one connection def disconnect(self, srv): + if not srv.nid_uuid: + panic('nid_uuid not set for ', srv.net_type, srv.nid) self.del_uuid(srv.nid_uuid) - if srv.net_type in ('tcp',) and not config.lctl_dump: - self.del_autoconn(srv.net_type, srv.nid, srv.hostaddr) def del_uuid(self, uuid): cmds = """ @@ -520,28 +397,33 @@ class LCTLInterface: quit""" % (uuid,) self.run(cmds) - # disconnect all - def disconnectAll(self, net): - cmds = """ - ignore_errors - network %s - disconnect - quit""" % (net) - self.run(cmds) - def attach(self, type, name, uuid): cmds = """ attach %s %s %s quit""" % (type, name, uuid) self.run(cmds) - - def setup(self, name, setup = ""): + + def setup(self, name, setup = ""): cmds = """ cfg_device %s setup %s quit""" % (name, setup) self.run(cmds) - + + def abort_recovery(self, name): + cmds = """ + ignore_errors + device $%s + abort_recovery + quit""" % (name) + self.run(cmds) + + def add_conn(self, name, conn_uuid): + cmds = """ + cfg_device %s + add_conn %s + quit""" % (name, conn_uuid) + self.run(cmds) # create a new device with lctl def newdev(self, type, name, uuid, setup = ""): @@ -551,7 +433,9 @@ class LCTLInterface: except CommandError, e: self.cleanup(name, uuid, 0) raise e - + if (config.abort_recovery): + if (type == 'obdfilter' or type == 'mds'): + self.abort_recovery(name) # cleanup a device def cleanup(self, name, uuid, force, failover = 0): @@ -567,22 +451,35 @@ class LCTLInterface: # create an lov def lov_setup(self, name, uuid, desc_uuid, mdsuuid, stripe_cnt, - stripe_sz, stripe_off, - pattern, devlist): + stripe_sz, stripe_off, pattern): cmds = """ attach lov %s %s - lov_setup %s %d %d %d %s %s - quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off, - pattern, devlist) + lov_setup %s %d %d %d %s + quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off, pattern) self.run(cmds) - # create an lov - def lov_setconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, - pattern, devlist): + # add an OBD to a LOV + def lov_add_obd(self, name, uuid, obd_uuid, index, gen): cmds = """ - cfg_device $%s - lov_setconfig %s %d %d %d %s %s - quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist) + cfg_device %s + lov_modify_tgts add %s %s %s %s + quit""" % (name, name, obd_uuid, index, gen) + self.run(cmds) + + # delete an OBD from a LOV + def lov_del_obd(self, name, uuid, obd_uuid, index, gen): + cmds = """ + cfg_device %s + lov_modify_tgts del %s %s %s %s + quit""" % (name, name, obd_uuid, index, gen) + self.run(cmds) + + # deactivate an OBD + def deactivate(self, name): + cmds = """ + cfg_device %s + deactivate + quit""" % (name) self.run(cmds) # dump the log file @@ -594,15 +491,25 @@ class LCTLInterface: # get list of devices def device_list(self): - devices = '/proc/fs/lustre/devices' ret = [] - if os.access(devices, os.R_OK): - try: - fp = open(devices, 'r') - ret = fp.readlines() - fp.close() - except IOError, e: - log(e) + if PLATFORM == 'LINUX': + devices = '/proc/fs/lustre/devices' + if os.access(devices, os.R_OK): + try: + fp = open(devices, 'r') + ret = fp.readlines() + fp.close() + except IOError, e: + log(e) + elif PLATFORM == 'DARWIN': + rc, out = self.run("device_list") + ret = out.split("\n") + if len(ret) == 0: + return ret + tail = ret[-1] + if not tail: + # remove the last empty line + ret = ret[:-1] return ret # get lustre version @@ -630,7 +537,7 @@ class LCTLInterface: quit""" % (timeout,) self.run(cmds) - # delete mount options + # set lustre upcall def set_lustre_upcall(self, upcall): cmds = """ set_lustre_upcall %s @@ -699,13 +606,14 @@ def do_find_file(base, mod): return module def find_module(src_dir, dev_dir, modname): - mod = '%s.o' % (modname) - module = src_dir +'/'+ dev_dir +'/'+ mod - try: - if os.access(module, os.R_OK): - return module - except OSError: - pass + modbase = src_dir +'/'+ dev_dir +'/'+ modname + for modext in '.ko', '.o': + module = modbase + modext + try: + if os.access(module, os.R_OK): + return module + except OSError: + pass return None # is the path a block device? @@ -717,6 +625,32 @@ def is_block(path): return 0 return stat.S_ISBLK(s[stat.ST_MODE]) +def my_realpath(path): + try: + if os.path.islink(path): + # get the realpath of the mount point path + if 'realpath' in dir(os.path): + real_path = os.path.realpath(path) + else: + real_path = path + link_count = 0 + while os.path.islink(real_path) and (link_count < 20): + link_count = link_count + 1 + path_link = os.readlink(real_path) + if os.path.isabs(path_link): + real_path = path_link + else: + real_path = os.path.join(os.path.dirname(real_path), path_link) + if link_count > 19: + panic("Encountered too many symbolic links resolving path:", path) + else: + real_path = path + + return real_path + except: + panic("Fatal error realpath()ing path:", path) + + # build fs according to type # fixme: dangerous def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): @@ -730,7 +664,7 @@ def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): # devsize is in 1k, and fs block count is in 4k block_cnt = devsize/4 - if fstype in ('ext3', 'extN'): + if fstype in ('ext3', 'ldiskfs'): # ext3 journal size is in megabytes if jsize == 0: if devsize == 0: @@ -738,12 +672,38 @@ def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): ret, out = runcmd("ls -l %s" %dev) devsize = int(string.split(out[0])[4]) / 1024 else: + # sfdisk works for symlink, hardlink, and realdev ret, out = runcmd("sfdisk -s %s" %dev) - devsize = int(out[0]) + if not ret: + devsize = int(out[0]) + else: + # sfdisk -s will fail for too large block device, + # then, read the size of partition from /proc/partitions + + # get the realpath of the device + # it may be the real device, such as /dev/hda7 + # or the hardlink created via mknod for a device + real_dev = my_realpath(dev) + + # get the major and minor number of the realpath via ls + # it seems python(os.stat) does not return + # the st_rdev member of the stat structure + ret, out = runcmd("ls -l %s" %real_dev) + major = string.split(string.split(out[0])[4], ",")[0] + minor = string.split(out[0])[5] + + # get the devsize from /proc/partitions with the major and minor number + ret, out = runcmd("cat /proc/partitions") + for line in out: + if len(line) > 1: + if string.split(line)[0] == major and string.split(line)[1] == minor: + devsize = int(string.split(line)[2]) + break + if devsize > 1024 * 1024: jsize = ((devsize / 102400) * 4) if jsize > 400: - jsize = 400 + jsize = 400 if jsize: jopt = "-J size=%d" %(jsize,) if isize: iopt = "-I %d" %(isize,) mkfs = 'mkfs.ext2 -j -b 4096 ' @@ -764,8 +724,8 @@ def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): if ret: panic("Unable to build fs:", dev, string.join(out)) # enable hash tree indexing on fsswe - if fstype in ('ext3', 'extN'): - htree = 'echo "feature FEATURE_C5" | debugfs -w' + if fstype in ('ext3', 'ldiskfs'): + htree = 'tune2fs -O dir_index' (ret, out) = run (htree, dev) if ret: panic("Unable to enable htree:", dev) @@ -777,10 +737,10 @@ def loop_base(): if not os.access(loop + str(0), os.R_OK): loop = loop + '/' if not os.access(loop + str(0), os.R_OK): - panic ("can't access loop devices") + loop='/dev/loop' return loop - -# find loop device assigned to thefile + +# find loop device assigned to the file def find_loop(file): loop = loop_base() for n in xrange(0, MAX_LOOP_DEVICES): @@ -817,7 +777,9 @@ def init_loop(file, size, fstype, journal_size, inode_size, mkfsoptions, reforma if os.access(dev, os.R_OK): (stat, out) = run('losetup', dev) if stat: - run('losetup', dev, file) + (stat, out) = run('losetup', dev, file) + if stat: + panic("losetup failed: (%s) %s" % (stat, out[0].strip())) return dev else: print "out of loop devices" @@ -836,7 +798,7 @@ def clean_loop(file): # determine if dev is formatted as a filesystem def need_format(fstype, dev): - # FIXME don't know how to implement this + # FIXME don't know how to implement this return 0 # initialize a block device if needed @@ -853,7 +815,6 @@ def block_dev(dev, size, fstype, reformat, autoformat, journal_size, # panic("device:", dev, # "not prepared, and autoformat is not set.\n", # "Rerun with --reformat option to format ALL filesystems") - return dev def if2addr(iface): @@ -865,82 +826,105 @@ def if2addr(iface): ip = string.split(addr, ':')[1] return ip -def sys_get_elan_position_file(): - procfiles = ["/proc/elan/device0/position", - "/proc/qsnet/elan4/device0/position", - "/proc/qsnet/elan3/device0/position"] - for p in procfiles: - if os.access(p, os.R_OK): - return p - return "" +def def_mount_options(fstype, target, blkdev): + """returns deafult mount options for passed fstype and target (mds, ost)""" + if fstype == 'ext3' or fstype == 'ldiskfs': + mountfsoptions = "errors=remount-ro" + if target == 'ost': + if sys_get_branch() == '2.4': + mountfsoptions = "%s,asyncdel" % (mountfsoptions) + #else: + # mountfsoptions = "%s,extents,mballoc" % (mountfsoptions) + elif target == 'mds': + if config.user_xattr: + mountfsoptions = "%s,user_xattr" % (mountfsoptions) + if config.acl: + mountfsoptions = "%s,acl" % (mountfsoptions) + + if blkdev: + # grab superblock info + dumpe2fs="dumpe2fs -f -h" + (ret, sb) = run(dumpe2fs, blkdev) + if ret: + panic("unable to get superblock for ", blkdev) + + # extract journal UUID + journal_UUID='' + journal_DEV='' + for line in sb: + lst = string.split(line, ":") + if lst[0] == 'Journal UUID': + if len(lst[1]) < 3: + panic("cannot retrieve journal UUID for ", blkdev) + if string.split(lst[1])[0] != '': + journal_UUID = string.split(lst[1])[0] + debug(blkdev, 'has journal UUID', journal_UUID) + if lst[0] == 'Journal device': + if len(lst[1]) < 3: + panic("cannot retrieve journal device for ", blkdev) + if string.split(lst[1])[0] != '0x0000': + journal_DEV = string.split(lst[1])[0] + debug(blkdev, 'has journal device', journal_DEV) + break -def sys_get_local_nid(net_type, wildcard, cluster_id): - """Return the local nid.""" - local = "" - if sys_get_elan_position_file(): - local = sys_get_local_address('elan', '*', cluster_id) - else: - local = sys_get_local_address(net_type, wildcard, cluster_id) - return local + if len(journal_UUID) == 0 or len(journal_DEV) == 0: + debug('no external journal found for', blkdev) + # use internal journal + return mountfsoptions -def sys_get_local_address(net_type, wildcard, cluster_id): - """Return the local address for the network type.""" - local = "" - if net_type in ('tcp',): - if ':' in wildcard: - iface, star = string.split(wildcard, ':') - local = if2addr(iface) - if not local: - panic ("unable to determine ip for:", wildcard) - else: - host = socket.gethostname() - local = socket.gethostbyname(host) - elif net_type == 'elan': - # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()' - f = sys_get_elan_position_file() - if not f: - panic ("unable to determine local Elan ID") - try: - fp = open(f, 'r') - lines = fp.readlines() - fp.close() - for l in lines: - a = string.split(l) - if a[0] == 'NodeId': - elan_id = a[1] - break - try: - nid = my_int(cluster_id) + my_int(elan_id) - local = "%d" % (nid) - except ValueError, e: - local = elan_id - except IOError, e: - log(e) - elif net_type == 'gm': - fixme("automatic local address for GM") - elif net_type == 'scimac': - scinode="/opt/scali/sbin/scinode" - if os.path.exists(scinode): - (rc,local) = run(scinode) - else: - panic (scinode, " not found on node with scimac networking") - if rc: - panic (scinode, " failed") - local=string.rstrip(local[0]) + # run blkid, lookup highest-priority device with matching UUID + blkid = "blkid -o device -l -t UUID='%s'" % (journal_UUID) + (ret, devname) = run(blkid) + if ret or len(devname) == 0: + panic("cannot find external journal for ", blkdev) + debug('found', blkdev, 'journal UUID', journal_UUID, 'on', + string.replace(devname[0], '\n', '')) + + try: # sigh, python 1.5 does not support os.stat().st_rdev + jdevpath = my_realpath(string.replace(devname[0], '\n', '')) + ret, out = runcmd("ls -l %s" %jdevpath) + debug('ls -l:', out) + major = int(string.split(string.split(out[0])[4], ',')[0]) + minor = int(string.split(out[0])[5]) + debug('major', major, 'minor', minor) + rdev = major << 8 | minor + except OSError: + panic("cannot stat ", devname[0]) + + debug('found', blkdev, 'journal UUID', journal_UUID, 'on', + jdevpath, 'rdev', rdev) - return local + # add mount option + if string.atoi(journal_DEV, 0) != rdev: + mountfsoptions = "%s,journal_dev=%#x" % (mountfsoptions,rdev) + + return mountfsoptions + return "" + +def sys_get_branch(): + """Returns kernel release""" + return os.uname()[2][:3] def mod_loaded(modname): """Check if a module is already loaded. Look in /proc/modules for it.""" - try: - fp = open('/proc/modules') - lines = fp.readlines() - fp.close() - # please forgive my tired fingers for this one - ret = filter(lambda word, mod=modname: word == mod, - map(lambda line: string.split(line)[0], lines)) - return ret - except Exception, e: + if PLATFORM == 'LINUX': + try: + fp = open('/proc/modules') + lines = fp.readlines() + fp.close() + # please forgive my tired fingers for this one + ret = filter(lambda word, mod=modname: word == mod, + map(lambda line: string.split(line)[0], lines)) + return ret + except Exception, e: + return 0 + elif PLATFORM == 'DARWIN': + ret, out = run('/usr/sbin/kextstat | /usr/bin/grep', modname) + if ret == 0: + return 1 + else: + return 0 + else: return 0 # XXX: instead of device_list, ask for $name and see what we get @@ -970,17 +954,18 @@ def is_network_prepared(): def fs_is_mounted(path): """Return true if path is a mounted lustre filesystem""" try: + real_path = my_realpath(path) + fp = open('/proc/mounts') lines = fp.readlines() fp.close() for l in lines: a = string.split(l) - if a[1] == path and a[2] == 'lustre_lite': + if a[1] == real_path and a[2] == 'lustre_lite': return 1 except IOError, e: log(e) return 0 - class kmod: """Manage kernel modules""" @@ -1003,34 +988,85 @@ class kmod: if mod_loaded(mod) and not config.noexec: continue log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir) - if src_dir: - module = find_module(src_dir, dev_dir, mod) - if not module: - panic('module not found:', mod) - (rc, out) = run('/sbin/insmod', module) - if rc: - raise CommandError('insmod', out, rc) - else: - (rc, out) = run('/sbin/modprobe', mod) - if rc: - raise CommandError('modprobe', out, rc) + if PLATFORM == 'LINUX': + options = '' + if mod == 'lnet': + #For LNET we really need modprobe to load defined LNDs + run('/sbin/modprobe lnet') + #But if that fails, try insmod anyhow with dev option + #accept=all for dev liblustre testing + options = 'accept=all' + if src_dir: + module = find_module(src_dir, dev_dir, mod) + if not module: + panic('module not found:', mod) + (rc, out) = run('/sbin/insmod', module, options) + if rc and not mod_loaded(mod): + if rc == 1: + print("Bad module options? Check dmesg.") + raise CommandError('insmod', out, rc) + else: + (rc, out) = run('/sbin/modprobe', mod) + if rc and not mod_loaded(mod): + if rc == 1: + print("Bad module options? Check dmesg.") + raise CommandError('modprobe', out, rc) + elif PLATFORM == 'DARWIN': + run('/sbin/kextload', KEXTPATH + mod + '.kext'); def cleanup_module(self): """Unload the modules in the list in reverse order.""" - rev = self.kmodule_list + + rev = self.kmodule_list[:] # make *copy* of list rev.reverse() for src_dir, dev_dir, mod in rev: if not mod_loaded(mod) and not config.noexec: continue - # debug hack - if mod == 'portals' and config.dump: - lctl.dump(config.dump) + if mod == 'ksocklnd' and not config.noexec: + # Ignore ksocklnd in module list (lnet will remove) + continue log('unloading module:', mod) - (rc, out) = run('/sbin/rmmod', mod) + if mod == 'lnet' and not config.noexec: + # remove any self-ref portals created + lctl.unconfigure_network() + if config.dump: + debug('dumping debug log to', config.dump) + # debug hack + lctl.dump(config.dump) + log('unloading the network') + lctl.unconfigure_network() + if mod_loaded("ksocklnd"): + if PLATFORM == 'LINUX': + run('/sbin/rmmod ksocklnd') + elif PLATFORM == 'DARWIN': + run('/sbin/kextunload', KEXTPATH+'ksocklnd.kext') + if mod_loaded("kqswlnd"): + run('/sbin/rmmod kqswlnd') + if mod_loaded("kgmlnd"): + run('/sbin/rmmod kgmlnd') + if mod_loaded("kopeniblnd"): + run('/sbin/rmmod kopeniblnd') + if mod_loaded("kiiblnd"): + run('/sbin/rmmod kiiblnd') + if mod_loaded("kviblnd"): + run('/sbin/rmmod kviblnd') + if mod_loaded("kciblnd"): + run('/sbin/rmmod kciblnd') + if mod_loaded("ko2iblnd"): + run('/sbin/rmmod ko2iblnd') + if mod_loaded("kralnd"): + run('/sbin/rmmod kralnd') + if mod_loaded("kptllnd"): + run('/sbin/rmmod kptllnd') + if PLATFORM == 'LINUX': + (rc, out) = run('/sbin/rmmod', mod) + elif PLATFORM == 'DARWIN': + (rc, out) = run('/sbin/kextunload', KEXTPATH+mod+'.kext'); if rc: log('! unable to unload module:', mod) logall(out) + # ============================================================ # Classes to prepare and cleanup the various objects # @@ -1046,10 +1082,10 @@ class Module: self._server = None self._connected = 0 self.kmod = kmod(config.lustre, config.portals) - + def info(self, *args): msg = string.join(map(str,args)) - print self.module_name + ":", self.name, self.uuid, msg + log (self.module_name + ":", self.name, self.uuid, msg) def cleanup(self): """ default cleanup, used for most modules """ @@ -1060,7 +1096,7 @@ class Module: log(self.module_name, "cleanup failed: ", self.name) e.dump() cleanup_error(e.rc) - + def add_portals_module(self, dev_dir, modname): """Append a module to list of modules to load.""" self.kmod.add_portals_module(dev_dir, modname) @@ -1072,7 +1108,7 @@ class Module: def load_module(self): """Load all the modules in the list in the order they appear.""" self.kmod.load_module() - + def cleanup_module(self): """Unload the modules in the list in reverse order.""" if self.safe_to_clean(): @@ -1080,189 +1116,42 @@ class Module: def safe_to_clean(self): return 1 - + def safe_to_clean_modules(self): return self.safe_to_clean() - + class Network(Module): - def __init__(self,db): + def __init__(self,db,nid_uuid=0): Module.__init__(self, 'NETWORK', db) self.net_type = self.db.get_val('nettype') self.nid = self.db.get_val('nid', '*') self.cluster_id = self.db.get_val('clusterid', "0") self.port = self.db.get_val_int('port', 0) - self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF) - self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF) - self.irq_affinity = self.db.get_val_int('irqaffinity', 0) - - if '*' in self.nid: - self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id) - if not self.nid: - panic("unable to set nid for", self.net_type, self.nid, cluster_id) - self.generic_nid = 1 - debug("nid:", self.nid) - else: - self.generic_nid = 0 - - self.nid_uuid = self.nid_to_uuid(self.nid) - - self.hostaddr = self.db.get_val('hostaddr', self.nid) - if '*' in self.hostaddr: - self.hostaddr = sys_get_local_address(self.net_type, self.hostaddr, self.cluster_id) - if not self.hostaddr: - panic("unable to set hostaddr for", self.net_type, self.hostaddr, self.cluster_id) - debug("hostaddr:", self.hostaddr) - - self.add_portals_module("libcfs", 'libcfs') - self.add_portals_module("portals", 'portals') - if node_needs_router(): - self.add_portals_module("router", 'kptlrouter') - if self.net_type == 'tcp': - self.add_portals_module("knals/socknal", 'ksocknal') - if self.net_type == 'elan': - self.add_portals_module("knals/qswnal", 'kqswnal') - if self.net_type == 'gm': - self.add_portals_module("knals/gmnal", 'kgmnal') - if self.net_type == 'scimac': - self.add_portals_module("knals/scimacnal", 'kscimacnal') - - def nid_to_uuid(self, nid): - return "NID_%s_UUID" %(nid,) + self.nid_uuid = nid_uuid + self.add_portals_module('libcfs', 'libcfs') + self.add_portals_module('lnet', 'lnet') + # Add the socklnd for developers without modprobe.conf (umls) + self.add_portals_module('klnds/socklnd', 'ksocklnd') def prepare(self): if is_network_prepared(): return - self.info(self.net_type, self.nid, self.port) - if not (config.record and self.generic_nid): - lctl.network(self.net_type, self.nid) + self.info(self.net_type, self.nid) if self.net_type == 'tcp': sys_tweak_socknal() if self.net_type == 'elan': sys_optimize_elan() - if self.port and node_is_router(): - run_one_acceptor(self.port) - self.connect_peer_gateways() - - def connect_peer_gateways(self): - for router in self.db.lookup_class('node'): - if router.get_val_int('router', 0): - for netuuid in router.get_networks(): - net = self.db.lookup(netuuid) - gw = Network(net) - if (gw.cluster_id == self.cluster_id and - gw.net_type == self.net_type): - if gw.nid != self.nid: - lctl.connect(gw) - - def disconnect_peer_gateways(self): - for router in self.db.lookup_class('node'): - if router.get_val_int('router', 0): - for netuuid in router.get_networks(): - net = self.db.lookup(netuuid) - gw = Network(net) - if (gw.cluster_id == self.cluster_id and - gw.net_type == self.net_type): - if gw.nid != self.nid: - try: - lctl.disconnect(gw) - except CommandError, e: - print "disconnect failed: ", self.name - e.dump() - cleanup_error(e.rc) - - def safe_to_clean(self): - return not is_network_prepared() - - def cleanup(self): - self.info(self.net_type, self.nid, self.port) - if self.port: - stop_acceptor(self.port) - if node_is_router(): - self.disconnect_peer_gateways() - -class RouteTable(Module): - def __init__(self,db): - Module.__init__(self, 'ROUTES', db) - - def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id, - lo, hi): - # only setup connections for tcp NALs - srvdb = None - if not net_type in ('tcp',): - return None - - # connect to target if route is to single node and this node is the gw - if lo == hi and local_interface(net_type, gw_cluster_id, gw): - if not local_cluster(net_type, tgt_cluster_id): - panic("target", lo, " not on the local cluster") - srvdb = self.db.nid2server(lo, net_type, gw_cluster_id) - # connect to gateway if this node is not the gw - elif (local_cluster(net_type, gw_cluster_id) - and not local_interface(net_type, gw_cluster_id, gw)): - srvdb = self.db.nid2server(gw, net_type, gw_cluster_id) - else: - return None - - if not srvdb: - panic("no server for nid", lo) - return None - - return Network(srvdb) - - def prepare(self): - if is_network_prepared(): - return - self.info() - for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl(): - lctl.add_route(net_type, gw, lo, hi) - srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi) - if srv: - lctl.connect(srv) - - def safe_to_clean(self): - return not is_network_prepared() - - def cleanup(self): - if is_network_prepared(): - # the network is still being used, don't clean it up - return - for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl(): - srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi) - if srv: - try: - lctl.disconnect(srv) - except CommandError, e: - print "disconnect failed: ", self.name - e.dump() - cleanup_error(e.rc) - - try: - lctl.del_route(net_type, gw, lo, hi) - except CommandError, e: - print "del_route failed: ", self.name - e.dump() - cleanup_error(e.rc) - -class Management(Module): - def __init__(self, db): - Module.__init__(self, 'MGMT', db) - self.add_lustre_module('lvfs', 'lvfs') - self.add_lustre_module('obdclass', 'obdclass') - self.add_lustre_module('ptlrpc', 'ptlrpc') - self.add_lustre_module('mgmt', 'mgmt_svc') - - def prepare(self): - if is_prepared(self.name): - return - self.info() - lctl.newdev("mgmt", self.name, self.uuid) def safe_to_clean(self): + if PLATFORM == 'LINUX': + return not is_network_prepared() + elif PLATFORM == 'DARWIN': + # XXX always assume it's safe to clean + return 1 return 1 def cleanup(self): - if is_prepared(self.name): - Module.cleanup(self) + self.info(self.net_type, self.nid) # This is only needed to load the modules; the LDLM device # is now created automatically. @@ -1272,6 +1161,7 @@ class LDLM(Module): self.add_lustre_module('lvfs', 'lvfs') self.add_lustre_module('obdclass', 'obdclass') self.add_lustre_module('ptlrpc', 'ptlrpc') + self.add_lustre_module('ptlrpc/gss', 'ptlrpc_gss') def prepare(self): return @@ -1286,62 +1176,85 @@ class LOV(Module): self.name = "lov_%s" % name_override self.add_lustre_module('lov', 'lov') self.mds_uuid = self.db.get_first_ref('mds') - self.stripe_sz = self.db.get_val_int('stripesize', 65536) + self.stripe_sz = self.db.get_val_int('stripesize', 1048576) self.stripe_off = self.db.get_val_int('stripeoffset', 0) self.pattern = self.db.get_val_int('stripepattern', 0) - self.devlist = self.db.get_refs('obd') - self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist)) + self.devlist = [] + self.stripe_cnt = self.db.get_val_int('stripecount', 1) self.osclist = [] self.desc_uuid = self.uuid self.uuid = generate_client_uuid(self.name) self.fs_name = fs_name + # settings below here won't be seen by the MDSDEV code! if config_only: self.config_only = 1 return self.config_only = None - mds= self.db.lookup(self.mds_uuid) + mds = self.db.lookup(self.mds_uuid) self.mds_name = mds.getName() - for obd_uuid in self.devlist: + self.devlist = self.db.get_lov_tgts('lov_tgt') + for (obd_uuid, index, gen, active) in self.devlist: + if obd_uuid == '': + continue obd = self.db.lookup(obd_uuid) osc = get_osc(obd, self.uuid, fs_name) if osc: - self.osclist.append(osc) + self.osclist.append((osc, index, gen, active)) else: panic('osc not found:', obd_uuid) - + if self.osclist == []: + debug("get_lov_tgts failed, using get_refs"); + index = 0 + self.devlist = self.db.get_refs('obd') + for obd_uuid in self.devlist: + obd = self.db.lookup(obd_uuid) + osc = get_osc(obd, self.uuid, fs_name) + if osc: + self.osclist.append((osc, index, 1, 1)) + else: + panic('osc not found:', obd_uuid) + index = index + 1 + if self.osclist == []: + panic('No OSCs configured for LOV') + debug('dbg LOV __init__:', self.osclist, self.devlist, self.stripe_cnt) + def prepare(self): + debug('dbg LOV prepare') if is_prepared(self.name): return - if self.config_only: - panic("Can't prepare config_only LOV ", self.name) - - for osc in self.osclist: + debug('dbg LOV prepare:', self.osclist, self.devlist) + self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, + self.stripe_off, self.pattern, self.devlist, + self.mds_name) + lctl.lov_setup(self.name, self.uuid, + self.desc_uuid, self.mds_name, self.stripe_cnt, + self.stripe_sz, self.stripe_off, self.pattern) + if self.osclist == []: + panic('No OSCs configured for LOV?') + for (osc, index, gen, active) in self.osclist: + target_uuid = osc.target_uuid try: # Only ignore connect failures with --force, which # isn't implemented here yet. + osc.active = active osc.prepare(ignore_connect_failure=0) except CommandError, e: print "Error preparing OSC %s\n" % osc.uuid raise e - self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, - self.stripe_off, self.pattern, self.devlist, self.mds_name) - lctl.lov_setup(self.name, self.uuid, - self.desc_uuid, self.mds_name, self.stripe_cnt, - self.stripe_sz, self.stripe_off, self.pattern, - string.join(self.devlist)) + lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen) def cleanup(self): if is_prepared(self.name): Module.cleanup(self) + for (osc, index, gen, active) in self.osclist: + osc.cleanup() if self.config_only: panic("Can't clean up config_only LOV ", self.name) - for osc in self.osclist: - osc.cleanup() def load_module(self): if self.config_only: panic("Can't load modules for config_only LOV ", self.name) - for osc in self.osclist: + for (osc, index, gen, active) in self.osclist: osc.load_module() break Module.load_module(self) @@ -1350,8 +1263,9 @@ class LOV(Module): if self.config_only: panic("Can't cleanup modules for config_only LOV ", self.name) Module.cleanup_module(self) - for osc in self.osclist: - osc.cleanup_module() + for (osc, index, gen, active) in self.osclist: + if active: + osc.cleanup_module() break class MDSDEV(Module): @@ -1360,9 +1274,20 @@ class MDSDEV(Module): self.devpath = self.db.get_val('devpath','') self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) + self.fstype = self.db.get_val('fstype', '') + if sys_get_branch() == '2.4' and self.fstype == 'ldiskfs': + self.fstype = 'ext3' + elif sys_get_branch() == '2.6' and self.fstype == 'ext3': + self.fstype = 'ldiskfs' + self.nspath = self.db.get_val('nspath', '') - self.mkfsoptions = self.db.get_val('mkfsoptions', '') + self.mkfsoptions = '-i 4096 ' + self.db.get_val('mkfsoptions', '') + self.mountfsoptions = self.db.get_val('mountfsoptions', '') + if config.quota: + self.quota = config.quota + else: + self.quota = self.db.get_val('quota', '') # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid target_uuid = self.db.get_first_ref('target') mds = self.db.lookup(target_uuid) @@ -1370,7 +1295,7 @@ class MDSDEV(Module): self.filesystem_uuids = mds.get_refs('filesystem') # FIXME: if fstype not set, then determine based on kernel version self.format = self.db.get_val('autoformat', "no") - if mds.get_val('failover', 0): + if mds.get_val('failover', '1') != '0': self.failover_mds = 'f' else: self.failover_mds = 'n' @@ -1381,10 +1306,11 @@ class MDSDEV(Module): self.active = 1 else: self.active = 0 - if self.active and config.group and config.group != mds.get_val('group'): + if self.active and config.group and config.group != mds.get_val('group', mds.get_val('name')): self.active = 0 self.inode_size = self.db.get_val_int('inodesize', 0) + debug('original inode_size ', self.inode_size) if self.inode_size == 0: # find the LOV for this MDS lovconfig_uuid = mds.get_first_ref('lovconfig') @@ -1397,32 +1323,41 @@ class MDSDEV(Module): lov = LOV(self.db.lookup(lov_uuid), lov_uuid, 'FS_name', config_only = 1) # default stripe count controls default inode_size - stripe_count = lov.stripe_cnt + if (lov.stripe_cnt > 0): + stripe_count = lov.stripe_cnt + else: + stripe_count = 1 if stripe_count > 77: - self.inode_size = 4096 - elif stripe_count > 35: + self.inode_size = 512 + elif stripe_count > 34: self.inode_size = 2048 elif stripe_count > 13: self.inode_size = 1024 - elif stripe_count > 3: - self.inode_size = 512 + #elif stripe_count < 3: + # self.inode_size = 256 else: - self.inode_size = 256 + self.inode_size = 512 + debug('stripe_count ', stripe_count,' inode_size ',self.inode_size) self.target_dev_uuid = self.uuid self.uuid = target_uuid - # modules + + # loading modules + if self.quota: + self.add_lustre_module('quota', 'lquota') self.add_lustre_module('mdc', 'mdc') self.add_lustre_module('osc', 'osc') self.add_lustre_module('lov', 'lov') self.add_lustre_module('mds', 'mds') + if self.fstype == 'ldiskfs': + self.add_lustre_module('ldiskfs', 'ldiskfs') if self.fstype: self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype)) def load_module(self): if self.active: Module.load_module(self) - + def prepare(self): if is_prepared(self.name): return @@ -1433,20 +1368,41 @@ class MDSDEV(Module): # run write_conf automatically, if --reformat used self.write_conf() self.info(self.devpath, self.fstype, self.size, self.format) - run_acceptors() # never reformat here blkdev = block_dev(self.devpath, self.size, self.fstype, 0, self.format, self.journal_size, self.inode_size, self.mkfsoptions) if not is_prepared('MDT'): lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") - try: + try: + mountfsoptions = def_mount_options(self.fstype, 'mds', blkdev) + + if config.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + config.mountfsoptions + else: + mountfsoptions = config.mountfsoptions + if self.mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + if self.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + mountfsoptions = self.mountfsoptions + + print 'MDS mount options: ' + mountfsoptions + lctl.newdev("mds", self.name, self.uuid, - setup ="%s %s %s" %(blkdev, self.fstype, self.name)) + setup ="%s %s %s %s %s" %(blkdev, self.fstype, self.name, + mountfsoptions, self.quota)) + self.group_upcall = self.db.get_val('group_upcall','') + sys_set_group_upcall(self.name, self.group_upcall) + except CommandError, e: if e.rc == 2: - panic("MDS is missing the config log. Need to run " + - "lconf --write_conf.") + panic("MDS failed to start. Check the syslog for details." + + " (May need to run lconf --write-conf)") else: raise e @@ -1459,7 +1415,7 @@ class MDSDEV(Module): self.inode_size, self.mkfsoptions) lctl.newdev("mds", self.name, self.uuid, setup ="%s %s" %(blkdev, self.fstype)) - + # record logs for the MDS lov for uuid in self.filesystem_uuids: log("recording clients for filesystem:", uuid) @@ -1474,14 +1430,6 @@ class MDSDEV(Module): client.prepare() lctl.mount_option(self.name, client.get_name(), "") lctl.end_record() - - config.cleanup = 1 - lctl.clear_log(self.name, self.name + '-clean') - lctl.record(self.name, self.name + '-clean') - client.cleanup() - lctl.del_mount_option(self.name) - lctl.end_record() - config.cleanup = 0 config.record = 0 # record logs for each client @@ -1494,53 +1442,74 @@ class MDSDEV(Module): client_name = node_db.getName() for prof_uuid in node_db.get_refs('profile'): prof_db = node_db.lookup(prof_uuid) - # refactor this into a funtion to test "clientness" - # of a node. + # refactor this into a funtion to test "clientness" of a node. for ref_class, ref_uuid in prof_db.get_all_refs(): if ref_class in ('mountpoint','echoclient'): - debug("recording", client_name) + thing = self.db.lookup(ref_uuid); + fs_uuid = thing.get_first_ref('filesystem') + if not fs_uuid in self.filesystem_uuids: + continue; + + log("Recording log", client_name, "on", self.name) old_noexec = config.noexec config.noexec = 0 noexec_opt = ('', '-n') ret, out = run (sys.argv[0], noexec_opt[old_noexec == 1], - " -v --record --nomod", + " -v --record --nomod --old_conf", "--record_log", client_name, "--record_device", self.name, "--node", client_name, config_options) - if config.verbose: - for s in out: log("record> ", string.strip(s)) - ret, out = run (sys.argv[0], - noexec_opt[old_noexec == 1], - "--cleanup -v --record --nomod", - "--record_log", client_name + "-clean", - "--record_device", self.name, - "--node", client_name, - config_options) + if ret: + lctl.clear_log(self.name, client_name) + print out + self.cleanup() + panic("Record client log %s on %s failed" %( + client_name, self.name)) if config.verbose: for s in out: log("record> ", string.strip(s)) config.noexec = old_noexec try: - lctl.cleanup(self.name, self.uuid, 0, 0) + lctl.cleanup(self.name, self.uuid, config.force, config.failover) except CommandError, e: log(self.module_name, "cleanup failed: ", self.name) e.dump() cleanup_error(e.rc) Module.cleanup(self) clean_loop(self.devpath) - - def msd_remaining(self): + + #change the mtime of LLOG to match the XML creation time + if toplustreDB.get_mtime(): + mtime = toplustreDB.get_mtime() + debug("changing mtime of LOGS to %s" %mtime) + ret, mktemp = runcmd("mktemp /tmp/lustre-cmd.XXXXXXXX") + if ret: + log(self.module_name, "create mtime LOGS cmdfile failed: ", self.name) + else: + mtimecmdfile = string.split(mktemp[0])[0] + fd = os.open(mtimecmdfile, os.O_RDWR | os.O_CREAT) + os.write(fd, "\n\n\n\n\n%s\n\n" %mtime) + os.close(fd) + cmd = "debugfs -w -R \"mi /LOGS\" <%s %s" %(mtimecmdfile, self.devpath) + ret, outs = runcmd(cmd) + os.remove(mtimecmdfile) + if ret: + print "Can not change mtime of LOGS by debugfs." + + def mds_remaining(self): out = lctl.device_list() for s in out: if string.split(s)[2] in ('mds',): + if string.split(s)[1] in ('ST',): + return 0 return 1 def safe_to_clean(self): return self.active def safe_to_clean_modules(self): - return not self.msd_remaining() + return not self.mds_remaining() def cleanup(self): if not self.active: @@ -1556,7 +1525,7 @@ class MDSDEV(Module): e.dump() cleanup_error(e.rc) Module.cleanup(self) - if not self.msd_remaining() and is_prepared('MDT'): + if not self.mds_remaining() and is_prepared('MDT'): try: lctl.cleanup("MDT", "MDT_UUID", config.force, config.failover) @@ -1573,15 +1542,34 @@ class OSD(Module): self.devpath = self.db.get_val('devpath', '') self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) + + # now as we store fids in EA on OST we need to make inode bigger self.inode_size = self.db.get_val_int('inodesize', 0) + if self.inode_size == 0: + self.inode_size = 256 self.mkfsoptions = self.db.get_val('mkfsoptions', '') + # Allocate fewer inodes on large OST devices. Most filesystems + # can be much more aggressive than this, but by default we can't. + if self.size > 1000000: + self.mkfsoptions = '-i 16384 ' + self.mkfsoptions + self.mountfsoptions = self.db.get_val('mountfsoptions', '') + if config.quota: + self.quota = config.quota + else: + self.quota = self.db.get_val('quota', '') + self.fstype = self.db.get_val('fstype', '') + if sys_get_branch() == '2.4' and self.fstype == 'ldiskfs': + self.fstype = 'ext3' + elif sys_get_branch() == '2.6' and self.fstype == 'ext3': + self.fstype = 'ldiskfs' + self.nspath = self.db.get_val('nspath', '') target_uuid = self.db.get_first_ref('target') ost = self.db.lookup(target_uuid) self.name = ost.getName() self.format = self.db.get_val('autoformat', 'yes') - if ost.get_val('failover', 0): + if ost.get_val('failover', '1') != '0': self.failover_ost = 'f' else: self.failover_ost = 'n' @@ -1593,14 +1581,18 @@ class OSD(Module): self.active = 1 else: self.active = 0 - if self.active and config.group and config.group != ost.get_val('group'): + if self.active and config.group and config.group != ost.get_val('group', ost.get_val('name')): self.active = 0 - + self.target_dev_uuid = self.uuid self.uuid = target_uuid # modules + if self.quota: + self.add_lustre_module('quota', 'lquota') self.add_lustre_module('ost', 'ost') # FIXME: should we default to ext3 here? + if self.fstype == 'ldiskfs': + self.add_lustre_module('ldiskfs', 'ldiskfs') if self.fstype: self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype)) self.add_lustre_module(self.osdtype, self.osdtype) @@ -1620,16 +1612,35 @@ class OSD(Module): return self.info(self.osdtype, self.devpath, self.size, self.fstype, self.format, self.journal_size, self.inode_size) - run_acceptors() if self.osdtype == 'obdecho': blkdev = '' else: blkdev = block_dev(self.devpath, self.size, self.fstype, config.reformat, self.format, self.journal_size, self.inode_size, self.mkfsoptions) + + mountfsoptions = def_mount_options(self.fstype, 'ost', blkdev) + + if config.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + config.mountfsoptions + else: + mountfsoptions = config.mountfsoptions + if self.mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + if self.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + mountfsoptions = self.mountfsoptions + + print 'OST mount options: ' + mountfsoptions + lctl.newdev(self.osdtype, self.name, self.uuid, - setup ="%s %s %s" %(blkdev, self.fstype, - self.failover_ost)) + setup ="%s %s %s %s %s" %(blkdev, self.fstype, + self.failover_ost, mountfsoptions, + self.quota)) if not is_prepared('OSS'): lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="") @@ -1669,16 +1680,6 @@ class OSD(Module): if not self.osdtype == 'obdecho': clean_loop(self.devpath) -def mgmt_uuid_for_fs(mtpt_name): - if not mtpt_name: - return '' - mtpt_db = toplevel.lookup_name(mtpt_name) - fs_uuid = mtpt_db.get_first_ref('filesystem') - fs = toplevel.lookup(fs_uuid) - if not fs: - return '' - return fs.get_first_ref('mgmt') - # Generic client module, used by OSC and MDC class Client(Module): def __init__(self, tgtdb, uuid, module, fs_name, self_name=None, @@ -1686,11 +1687,12 @@ class Client(Module): self.target_name = tgtdb.getName() self.target_uuid = tgtdb.getUUID() self.db = tgtdb + self.backup_targets = [] self.tgt_dev_uuid = get_active_target(tgtdb) if not self.tgt_dev_uuid: panic("No target device found for target:", self.target_name) - + self.kmod = kmod(config.lustre, config.portals) self._server = None self._connected = 0 @@ -1704,11 +1706,7 @@ class Client(Module): self.name = self_name self.uuid = uuid self.lookup_server(self.tgt_dev_uuid) - mgmt_uuid = mgmt_uuid_for_fs(fs_name) - if mgmt_uuid: - self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid) - else: - self.mgmt_name = '' + self.lookup_backup_targets() self.fs_name = fs_name if not module_dir: module_dir = module @@ -1718,29 +1716,43 @@ class Client(Module): """ Lookup a server's network information """ self._server_nets = get_ost_net(self.db, srv_uuid) if len(self._server_nets) == 0: - panic ("Unable to find a server for:", srv_uuid) + panic("Unable to find a server for:", srv_uuid) def get_servers(self): return self._server_nets + def lookup_backup_targets(self): + """ Lookup alternative network information """ + prof_list = toplustreDB.get_refs('profile') + for prof_uuid in prof_list: + prof_db = toplustreDB.lookup(prof_uuid) + if not prof_db: + panic("profile:", prof_uuid, "not found.") + for ref_class, ref_uuid in prof_db.get_all_refs(): + if ref_class in ('osd', 'mdsdev'): + devdb = toplustreDB.lookup(ref_uuid) + uuid = devdb.get_first_ref('target') + if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid: + debug("add backup target", ref_uuid) + self.backup_targets.append(ref_uuid) + def prepare(self, ignore_connect_failure = 0): self.info(self.target_uuid) if is_prepared(self.name): self.cleanup() try: - srv = choose_local_server(self.get_servers()) - if srv: + srv_list = self.get_servers() + debug('dbg CLIENT __prepare__:', self.target_uuid, srv_list) + for srv in srv_list: lctl.connect(srv) - else: - routes = find_route(self.get_servers()) - if len(routes) == 0: - panic ("no route to", self.target_uuid) - for (srv, r) in routes: - lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3]) + if len(srv_list) == 0: + panic("no servers for ", self.target_uuid) except CommandError, e: if not ignore_connect_failure: raise e - if srv: + + if srv_list[0]: + srv = srv_list[0] if self.target_uuid in config.inactive and self.permits_inactive(): debug("%s inactive" % self.target_uuid) inactive_p = "inactive" @@ -1748,24 +1760,35 @@ class Client(Module): debug("%s active" % self.target_uuid) inactive_p = "" lctl.newdev(self.module, self.name, self.uuid, - setup ="%s %s %s %s" % (self.target_uuid, srv.nid_uuid, - inactive_p, self.mgmt_name)) + setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid, + inactive_p)) + else: + panic("Unable to create OSC for ", self.target_uuid) + + for tgt_dev_uuid in self.backup_targets: + this_nets = get_ost_net(toplustreDB, tgt_dev_uuid) + if len(this_nets) == 0: + panic ("Unable to find a backup server for:", tgt_dev_uuid) + else: + for srv in this_nets: + lctl.connect(srv) + if srv: + lctl.add_conn(self.name, srv.nid_uuid); + def cleanup(self): if is_prepared(self.name): Module.cleanup(self) - try: - srv = choose_local_server(self.get_servers()) - if srv: - lctl.disconnect(srv) + srv_list = self.get_servers() + for srv in srv_list: + lctl.disconnect(srv) + for tgt_dev_uuid in self.backup_targets: + this_nets = get_ost_net(toplustreDB, tgt_dev_uuid) + if len(this_nets) == 0: + panic ("Unable to find a backup server for:", tgt_dev_uuid) else: - for (srv, r) in find_route(self.get_servers()): - lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3]) - except CommandError, e: - log(self.module_name, "cleanup failed: ", self.name) - e.dump() - cleanup_error(e.rc) - + for srv in this_nets: + lctl.disconnect(srv) class MDC(Client): def __init__(self, db, uuid, fs_name): @@ -1781,15 +1804,6 @@ class OSC(Client): def permits_inactive(self): return 1 -def mgmtcli_name_for_uuid(uuid): - return 'MGMTCLI_%s' % uuid - -class ManagementClient(Client): - def __init__(self, db, uuid): - Client.__init__(self, db, uuid, 'mgmt_cli', '', - self_name = mgmtcli_name_for_uuid(db.getUUID()), - module_dir = 'mgmt') - class COBD(Module): def __init__(self, db): Module.__init__(self, 'COBD', db) @@ -1810,8 +1824,10 @@ class COBD(Module): # virtual interface for OSC and LOV class VOSC(Module): - def __init__(self, db, uuid, fs_name, name_override = None): + def __init__(self, db, uuid, fs_name, name_override = None, quota = None): Module.__init__(self, 'VOSC', db) + if quota: + self.add_lustre_module('quota', 'lquota') if db.get_class() == 'lov': self.osc = LOV(db, uuid, fs_name, name_override) else: @@ -1825,9 +1841,11 @@ class VOSC(Module): def cleanup(self): self.osc.cleanup() def load_module(self): + Module.load_module(self) self.osc.load_module() def cleanup_module(self): self.osc.cleanup_module() + Module.cleanup_module(self) class ECHO_CLIENT(Module): @@ -1842,7 +1860,6 @@ class ECHO_CLIENT(Module): def prepare(self): if is_prepared(self.name): return - run_acceptors() self.osc.prepare() # XXX This is so cheating. -p self.info(self.obd_uuid) @@ -1871,35 +1888,51 @@ def generate_client_uuid(name): return client_uuid[:36] +def my_rstrip(s, chars): + """my_rstrip(s, chars) -> strips any instances of the characters + found in chars from the right side of string s""" + # XXX required because python versions pre 2.2.3 don't allow + #string.rstrip() to take alternate char lists + import string + ns=s + try: + ns = string.rstrip(s, '/') + except TypeError, e: + for i in range(len(s) - 1, 0, -1): + if s[i] in chars: + continue + else: + ns = s[0:i+1] + break + return ns + + class Mountpoint(Module): def __init__(self,db): Module.__init__(self, 'MTPT', db) - self.path = self.db.get_val('path') + self.path = my_rstrip(self.db.get_val('path'), '/') + self.clientoptions = self.db.get_val('clientoptions', '') self.fs_uuid = self.db.get_first_ref('filesystem') fs = self.db.lookup(self.fs_uuid) self.mds_uuid = fs.get_first_ref('mds') + mds_db = self.db.lookup(self.mds_uuid) + if config.quota: + quota = config.quota + else: + quota = mds_db.get_val('quota', config.quota) self.obd_uuid = fs.get_first_ref('obd') - self.mgmt_uuid = fs.get_first_ref('mgmt') obd = self.db.lookup(self.obd_uuid) client_uuid = generate_client_uuid(self.name) - self.vosc = VOSC(obd, client_uuid, self.name) + self.vosc = VOSC(obd, client_uuid, self.name, quota=quota) self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid) self.add_lustre_module('mdc', 'mdc') self.add_lustre_module('llite', 'llite') - if self.mgmt_uuid: - self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid), - client_uuid) - else: - self.mgmtcli = None def prepare(self): if fs_is_mounted(self.path): log(self.path, "already mounted.") return - run_acceptors() - if self.mgmtcli: - self.mgmtcli.prepare() self.vosc.prepare() self.mdc.prepare() mdc_name = self.mdc.name @@ -1908,12 +1941,24 @@ class Mountpoint(Module): if config.record or config.lctl_dump: lctl.mount_option(local_node_name, self.vosc.get_name(), mdc_name) return - cmd = "mount -t lustre_lite -o osc=%s,mdc=%s %s %s" % \ - (self.vosc.get_name(), mdc_name, config.config, self.path) + + if config.clientoptions: + if self.clientoptions: + self.clientoptions = self.clientoptions + ',' + config.clientoptions + else: + self.clientoptions = config.clientoptions + if self.clientoptions: + self.clientoptions = ',' + self.clientoptions + # Linux kernel will deal with async and not pass it to ll_fill_super, + # so replace it with Lustre async + self.clientoptions = string.replace(self.clientoptions, "async", "lasync") + + cmd = "mount -t lustre_lite -o osc=%s,mdc=%s%s %s %s" % \ + (self.vosc.get_name(), mdc_name, self.clientoptions, config.config, self.path) run("mkdir", self.path) ret, val = run(cmd) if ret: - self.mdc.cleanup() + self.mdc.cleanup() self.vosc.cleanup() panic("mount failed:", self.path, ":", string.join(val)) @@ -1936,20 +1981,14 @@ class Mountpoint(Module): self.mdc.cleanup() self.vosc.cleanup() - if self.mgmtcli: - self.mgmtcli.cleanup() def load_module(self): - if self.mgmtcli: - self.mgmtcli.load_module() self.vosc.load_module() Module.load_module(self) def cleanup_module(self): Module.cleanup_module(self) self.vosc.cleanup_module() - if self.mgmtcli: - self.mgmtcli.cleanup_module() # ============================================================ @@ -1967,22 +2006,19 @@ def get_ost_net(self, osd_uuid): " node_ref:", node_uuid) for net_uuid in node.get_networks(): db = node.lookup(net_uuid) - srv_list.append(Network(db)) + net = Network(db, node_uuid) + srv_list.append(net) return srv_list -# the order of iniitailization is based on level. +# the order of iniitailization is based on level. def getServiceLevel(self): type = self.get_class() ret=0; if type in ('network',): ret = 5 - elif type in ('routetbl',): - ret = 6 elif type in ('ldlm',): ret = 20 - elif type in ('mgmt',): - ret = 25 elif type in ('osd', 'cobd'): ret = 30 elif type in ('mdsdev',): @@ -1993,7 +2029,7 @@ def getServiceLevel(self): panic("Unknown type: ", type) if ret < config.minlevel or ret > config.maxlevel: - ret = 0 + ret = 0 return ret # @@ -2001,7 +2037,7 @@ def getServiceLevel(self): # [(level, db_object),] def getServices(self): list = [] - for ref_class, ref_uuid in self.get_all_refs(): + for ref_class, ref_uuid in self.get_all_refs(): servdb = self.lookup(ref_uuid) if servdb: level = getServiceLevel(servdb) @@ -2015,7 +2051,7 @@ def getServices(self): ############################################################ -# MDC UUID hack - +# MDC UUID hack - # FIXME: clean this mess up! # # OSC is no longer in the xml, so we have to fake it. @@ -2031,101 +2067,6 @@ def get_mdc(db, uuid, fs_name, mds_uuid): mdc = MDC(mds_db, uuid, fs_name) return mdc -############################################################ -# routing ("rooting") - -# list of (nettype, cluster_id, nid) -local_clusters = [] - -def find_local_clusters(node_db): - global local_clusters - for netuuid in node_db.get_networks(): - net = node_db.lookup(netuuid) - srv = Network(net) - debug("add_local", netuuid) - local_clusters.append((srv.net_type, srv.cluster_id, srv.nid)) - if srv.port > 0: - if acceptors.has_key(srv.port): - panic("duplicate port:", srv.port) - acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type, - srv.send_mem, srv.recv_mem, - srv.irq_affinity) - -# This node is a gateway. -is_router = 0 -def node_is_router(): - return is_router - -# If there are any routers found in the config, then this will be true -# and all nodes will load kptlrouter. -needs_router = 0 -def node_needs_router(): - return needs_router or is_router - -# list of (nettype, gw, tgt_cluster_id, lo, hi) -# Currently, these local routes are only added to kptlrouter route -# table if they are needed to connect to a specific server. This -# should be changed so all available routes are loaded, and the -# ptlrouter can make all the decisions. -local_routes = [] - -def find_local_routes(lustre): - """ Scan the lustre config looking for routers . Build list of - routes. """ - global local_routes, needs_router - local_routes = [] - list = lustre.lookup_class('node') - for router in list: - if router.get_val_int('router', 0): - needs_router = 1 - for (local_type, local_cluster_id, local_nid) in local_clusters: - gw = None - for netuuid in router.get_networks(): - db = router.lookup(netuuid) - if (local_type == db.get_val('nettype') and - local_cluster_id == db.get_val('clusterid')): - gw = db.get_val('nid') - break - if gw: - debug("find_local_routes: gw is", gw) - for route in router.get_local_routes(local_type, gw): - local_routes.append(route) - debug("find_local_routes:", local_routes) - - -def choose_local_server(srv_list): - for srv in srv_list: - if local_cluster(srv.net_type, srv.cluster_id): - return srv - -def local_cluster(net_type, cluster_id): - for cluster in local_clusters: - if net_type == cluster[0] and cluster_id == cluster[1]: - return 1 - return 0 - -def local_interface(net_type, cluster_id, nid): - for cluster in local_clusters: - if (net_type == cluster[0] and cluster_id == cluster[1] - and nid == cluster[2]): - return 1 - return 0 - -def find_route(srv_list): - result = [] - frm_type = local_clusters[0][0] - for srv in srv_list: - debug("find_route: srv:", srv.nid, "type: ", srv.net_type) - to_type = srv.net_type - to = srv.nid - cluster_id = srv.cluster_id - debug ('looking for route to', to_type, to) - for r in local_routes: - debug("find_route: ", r) - if (r[3] <= to and to <= r[4]) and cluster_id == r[2]: - result.append((srv, r)) - return result - def get_active_target(db): target_uuid = db.getUUID() target_name = db.getName() @@ -2141,7 +2082,7 @@ def get_server_by_nid_uuid(db, nid_uuid): net = Network(n) if net.nid_uuid == nid_uuid: return net - + ############################################################ # lconf level logic @@ -2156,8 +2097,6 @@ def newService(db): n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID") elif type == 'network': n = Network(db) - elif type == 'routetbl': - n = RouteTable(db) elif type == 'osd': n = OSD(db) elif type == 'cobd': @@ -2168,15 +2107,13 @@ def newService(db): n = Mountpoint(db) elif type == 'echoclient': n = ECHO_CLIENT(db) - elif type == 'mgmt': - n = Management(db) else: - panic ("unknown service type:", type) + panic("unknown service type:", type) return n # # Prepare the system to run lustre using a particular profile -# in a the configuration. +# in a the configuration. # * load & the modules # * setup networking for the current node # * make sure partitions are in place and prepared @@ -2186,17 +2123,22 @@ def for_each_profile(db, prof_list, operation): for prof_uuid in prof_list: prof_db = db.lookup(prof_uuid) if not prof_db: - panic("profile:", profile, "not found.") + panic("profile:", prof_uuid, "not found.") services = getServices(prof_db) operation(services) - + def doWriteconf(services): if config.nosetup: return + have_mds = 0 for s in services: if s[1].get_class() == 'mdsdev': n = newService(s[1]) n.write_conf() + have_mds = 1 + if have_mds == 0: + panic("Cannot find mds device, please run --write_conf on the mds node.") + def doSetup(services): if config.nosetup: @@ -2204,7 +2146,7 @@ def doSetup(services): for s in services: n = newService(s[1]) n.prepare() - + def doModules(services): if config.nomod: return @@ -2230,39 +2172,127 @@ def doUnloadModules(services): if n.safe_to_clean_modules(): n.cleanup_module() +def doMakeServiceScript(services): + if config.nosetup: + return + try: + os.makedirs(config.service_scripts) + except OSError, e: + if e[0] != errno.EEXIST: + panic("Couldn't create scripts dir " + config.service_scripts + ": " + e[1]) + + for s in services: + if s[1].get_class() != 'osd' and s[1].get_class() != 'mdsdev': + continue + + target_uuid = s[1].get_first_ref('target') + target = toplustreDB.lookup(target_uuid) + target_symlink = config.service_scripts + "/" + target.getName() + if config.force: + try: + try: + os.unlink(target_symlink) + if config.verbose: + print "Removed " + target_symlink + except OSError, e: + if e[0] != errno.EISDIR: + raise e + os.rmdir(target_symlink) + if config.verbose: + print "Removed " + target_symlink + except OSError, e: + if e[0] != errno.ENOENT: + panic("Error removing " + target_symlink + ": " + e[1]) + + try: + os.symlink("/etc/init.d/lustre", target_symlink) + if config.verbose: + print "Created service link " + target_symlink + " to /etc/init.d/lustre" + + except OSError, e: + if e[0] == errno.EEXIST: + extra_error = " (use --force option to remove existing files)" + else: + extra_error = "" + panic("Error creating " + target_symlink + ": " + e[1] + extra_error) + +# Check mtime of config logs +def doCheckMtime(lustreDB, hosts): + for h in hosts: + node_db = lustreDB.lookup_name(h, 'node') + if node_db: + break + if not node_db: + return + + mdsdb = 0 + prof_list = node_db.get_refs('profile') + for prof_uuid in prof_list: + prof_db = node_db.lookup(prof_uuid) + if prof_db: + services = getServices(prof_db) + for s in services: + if s[1].get_class() == 'mdsdev': + mdsdb = s[1] + break + + if mdsdb and lustreDB.get_mtime(): + debug("Checking XML modification time") + devpath = mdsdb.get_val('devpath','') + xmtime = string.atol(lustreDB.get_mtime()) + cmd = "debugfs -c -R 'stat /LOGS' %s 2>&1 | grep mtime" %devpath + ret, kmtimes = runcmd(cmd) + if ret: + log("Can not get mtime info of MDS LOGS directory") + else: + kmtime = string.atoi(string.split(kmtimes[0])[1], 0) + if xmtime > kmtime: + debug('xmtime ', xmtime, '> kmtime', kmtime) + if config.old_conf: + log("Warning: MDS startup logs are older than config %s." + " Please run --write_conf on stopped MDS to update." + %CONFIG_FILE) + else: + panic("Error: MDS startup logs are older than config %s." + " Please run --write_conf on stopped MDS to update." + " Use '--old_conf' to start anyways." %CONFIG_FILE) + return + # -# Load profile for +# Load profile for def doHost(lustreDB, hosts): - global is_router, local_node_name + global local_node_name, tgt_select node_db = None for h in hosts: node_db = lustreDB.lookup_name(h, 'node') if node_db: + if config.service: + tgt_select[config.service] = h + config.group = config.service break if not node_db: panic('No host entry found.') local_node_name = node_db.get_val('name', 0) - is_router = node_db.get_val_int('router', 0) lustre_upcall = node_db.get_val('lustreUpcall', '') portals_upcall = node_db.get_val('portalsUpcall', '') timeout = node_db.get_val_int('timeout', 0) ptldebug = node_db.get_val('ptldebug', '') subsystem = node_db.get_val('subsystem', '') - - find_local_clusters(node_db) - if not is_router: - find_local_routes(lustreDB) # Two step process: (1) load modules, (2) setup lustre # if not cleaning, load modules first. prof_list = node_db.get_refs('profile') - if config.write_conf: + if config.make_service_scripts: + for_each_profile(node_db, prof_list, doMakeServiceScript) + return + + elif config.write_conf: for_each_profile(node_db, prof_list, doModules) - sys_make_devices() for_each_profile(node_db, prof_list, doWriteconf) for_each_profile(node_db, prof_list, doUnloadModules) + lustreDB.close() elif config.recover: if not (config.tgt_uuid and config.client_uuid and config.conn_uuid): @@ -2271,15 +2301,14 @@ def doHost(lustreDB, hosts): doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid, config.conn_uuid) elif config.cleanup: - if config.force: - # the command line can override this value - timeout = 5 + if not mod_loaded('lnet'): + return + # ugly hack, only need to run lctl commands for --dump if config.lctl_dump or config.record: for_each_profile(node_db, prof_list, doCleanup) return - sys_set_timeout(timeout) sys_set_ptldebug(ptldebug) sys_set_subsystem(subsystem) sys_set_lustre_upcall(lustre_upcall) @@ -2287,6 +2316,7 @@ def doHost(lustreDB, hosts): for_each_profile(node_db, prof_list, doCleanup) for_each_profile(node_db, prof_list, doUnloadModules) + lustreDB.close() else: # ugly hack, only need to run lctl commands for --dump @@ -2296,15 +2326,17 @@ def doHost(lustreDB, hosts): for_each_profile(node_db, prof_list, doSetup) return - sys_make_devices() - sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF) - sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF) + if PLATFORM == 'LINUX': + sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF) + sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF) for_each_profile(node_db, prof_list, doModules) - sys_set_debug_path() - sys_set_ptldebug(ptldebug) - sys_set_subsystem(subsystem) + if PLATFORM == 'LINUX': + # XXX need to be fixed for Darwin + sys_set_debug_path() + sys_set_ptldebug(ptldebug) + sys_set_subsystem(subsystem) script = config.gdb_script run(lctl.lctl, ' modules >', script) if config.gdb: @@ -2317,41 +2349,181 @@ def doHost(lustreDB, hosts): sys_set_portals_upcall(portals_upcall) for_each_profile(node_db, prof_list, doSetup) + lustreDB.close() -def doRecovery(db, lctl, tgt_uuid, client_uuid, nid_uuid): - tgt = db.lookup(tgt_uuid) +def add_clumanager_node(node_db, nodes, services): + new_services = [] + node_name = node_db.getUUID() + nodes[node_name] = [] + + for prof_uuid in node_db.get_refs('profile'): + prof_db = toplustreDB.lookup(prof_uuid) + for ref_class, ref_uuid in prof_db.get_all_refs(): + if ref_class not in ('osd', 'mdsdev'): + continue + devdb = toplustreDB.lookup(ref_uuid) + tgt_uuid = devdb.get_first_ref('target') + + nodes[node_name].append(ref_uuid) + + if not services.has_key(tgt_uuid): + if config.verbose: + print "New service: " + tgt_uuid + " (originally found on " + node_name + ")" + new_services.append(tgt_uuid) + services[tgt_uuid] = [] + services[tgt_uuid].append(ref_uuid) + + return new_services + +def add_clumanager_services(new_services, nodes, dev_list): + new_nodes = [] + for devdb in dev_list: + tgt_uuid = devdb.get_first_ref('target') + if tgt_uuid in new_services: + node_uuid = devdb.get_first_ref('node') + + if not (nodes.has_key(node_uuid) or node_uuid in new_nodes): + if config.verbose: + print "New node: " + node_uuid + " for service " + tgt_uuid + new_nodes.append(node_uuid) + + return new_nodes + +def doClumanager(lustreDB, hosts): + nodes = {} + services = {} + + dev_list = [] + + for dev_uuid in toplustreDB.get_refs('osd') + toplustreDB.get_refs('mdsdev'): + dev_list.append(lustreDB.lookup(dev_uuid)) + + node_db = None + for h in hosts: + node_db = lustreDB.lookup_name(h, 'node') + if node_db: + our_host = h + new_services = add_clumanager_node(node_db, nodes, services) + break + + if not node_db: + panic('No host entry found.') + + while 1: + if len(new_services) == 0: + break + + new_nodes = add_clumanager_services(new_services, nodes, dev_list) + if len(new_nodes) == 0: + break + + if len(new_nodes) + len(nodes.keys()) > 8: + panic("CluManager only supports 8 nodes per failover \"cluster.\"") + + new_services = [] + for node_uuid in new_nodes: + node_db = lustreDB.lookup(node_uuid) + if not node_db: + panic("No node entry for " + node_uuid + " was found.") + + new_services.append(add_clumanager_node(node_db, nodes, services)) + + nodenames = [] + for node in nodes.keys(): + nodedb = lustreDB.lookup(node) + nodenames.append(nodedb.getName()) + nodenames.sort() + + print """ + + + + + + + + + """ % (string.join(nodenames), config.rawprimary, config.rawsecondary) + + + i = 0 + for node in nodenames: + print " " % (i, node) + i = i + 1 + + print " \n " + + servicekeys = services.keys() + servicekeys.sort() + + i = 0 + for service in servicekeys: + svcdb = lustreDB.lookup(service) + print " " % (i, svcdb.getName()) + i = i + 1 + + j = 0 + active_uuid = get_active_target(svcdb) + for svc_uuid in [active_uuid] + services[service]: + if svc_uuid == active_uuid and j > 0: + continue + svcdb = lustreDB.lookup(svc_uuid) + + svc_node_uuid = svcdb.get_first_ref('node') + svc_nodedb = lustreDB.lookup(svc_node_uuid) + + print " " % (j, svc_nodedb.getName()) + j = j + 1 + + print " " + + print " \n " + + i = 0 + for service in servicekeys: + svcdb = lustreDB.lookup(service) + active_uuid = get_active_target(svcdb) + activedb = lustreDB.lookup(active_uuid) + + svc_node_uuid = activedb.get_first_ref('node') + svc_nodedb = lustreDB.lookup(svc_node_uuid) + + print " " \ + % ( svcdb.getName(), i, svcdb.getName(), config.service_scripts, svcdb.getName()) + print " \n " + i = i + 1 + + print " \n" + +def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid): + tgt = lustreDB.lookup(tgt_uuid) if not tgt: raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.") new_uuid = get_active_target(tgt) if not new_uuid: raise Lustre.LconfError("doRecovery: no active target found for: " + tgt_uuid) - net = choose_local_server(get_ost_net(db, new_uuid)) - if not net: + srv_list = find_local_servers(get_ost_net(lustreDB, new_uuid)) + if not srv_list[0]: raise Lustre.LconfError("Unable to find a connection to:" + new_uuid) - log("Reconnecting", tgt_uuid, " to ", net.nid_uuid); - try: - oldnet = get_server_by_nid_uuid(db, nid_uuid) - if oldnet: - lctl.disconnect(oldnet) - except CommandError, e: - log("recover: disconnect", nid_uuid, "failed: ") - e.dump() + oldsrv = get_server_by_nid_uuid(lustreDB, nid_uuid) + lustreDB.close() - try: - lctl.connect(net) - except CommandError, e: - log("recover: connect failed") - e.dump() + for srv in srv_list: + if oldsrv.net_type != srv.net_type: + continue + + log("Reconnecting", tgt_uuid, "to", srv.nid_uuid) - lctl.recover(client_uuid, net.nid_uuid) + lctl.recover(client_uuid, srv.nid_uuid) def setupModulePath(cmd, portals_dir = PORTALS_DIR): base = os.path.dirname(cmd) if development_mode(): if not config.lustre: + debug('using objdir module paths') config.lustre = (os.path.join(base, "..")) # normalize the portals dir, using command line arg if set if config.portals: @@ -2361,7 +2533,7 @@ def setupModulePath(cmd, portals_dir = PORTALS_DIR): debug('config.portals', config.portals) elif config.lustre and config.portals: # production mode - # if --lustre and --portals, normalize portals + # if --lustre and --portals, normalize portals # can ignore POTRALS_DIR here, since it is probly useless here config.portals = os.path.join(config.lustre, config.portals) debug('config.portals B', config.portals) @@ -2379,25 +2551,51 @@ def sysctl(path, val): def sys_set_debug_path(): - sysctl('portals/debug_path', config.debug_path) + sysctl('lnet/debug_path', config.debug_path) + +def validate_upcall(upcall): + import os + if upcall in ('DEFAULT','NONE'): + pass + elif os.path.exists(upcall): + if not os.access(upcall, os.X_OK): + print "WARNING upcall script not executable: %s" % upcall + else: + print "WARNING invalid upcall script specified: %s" % upcall def sys_set_lustre_upcall(upcall): - # the command overrides the value in the node config + # the command line overrides the value in the node config if config.lustre_upcall: upcall = config.lustre_upcall elif config.upcall: upcall = config.upcall if upcall: + validate_upcall(upcall) lctl.set_lustre_upcall(upcall) def sys_set_portals_upcall(upcall): - # the command overrides the value in the node config + # the command line overrides the value in the node config if config.portals_upcall: upcall = config.portals_upcall elif config.upcall: upcall = config.upcall if upcall: - sysctl('portals/upcall', upcall) + validate_upcall(upcall) + sysctl('lnet/upcall', upcall) + +def sys_set_group_upcall(mds, upcall): + if config.noexec: + return + # the command line overrides the value in the MDS config + if config.group_upcall: + upcall = config.group_upcall + if upcall: + validate_upcall(upcall) + debug("setting MDS", mds, "upcall to:", upcall) + path = "/proc/fs/lustre/mds/" + mds + "/group_upcall" + fp = open(path, 'w') + fp.write(upcall) + fp.close() def sys_set_timeout(timeout): # the command overrides the value in the node config @@ -2415,8 +2613,8 @@ def sys_optimize_elan (): "/proc/qsnet/elan3/config/eventint_punt_loops", "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"] for p in procfiles: - if os.access(p, os.R_OK): - run ("echo 0 > " + p) + if os.access(p, os.W_OK): + run ("echo 1 > " + p) def sys_set_ptldebug(ptldebug): if config.ptldebug: @@ -2425,7 +2623,7 @@ def sys_set_ptldebug(ptldebug): try: val = eval(ptldebug, ptldebug_names) val = "0x%x" % (val) - sysctl('portals/debug', val) + sysctl('lnet/debug', val) except NameError, e: panic(str(e)) @@ -2436,7 +2634,7 @@ def sys_set_subsystem(subsystem): try: val = eval(subsystem, subsystem_names) val = "0x%x" % (val) - sysctl('portals/subsystem_debug', val) + sysctl('lnet/subsystem_debug', val) except NameError, e: panic(str(e)) @@ -2452,13 +2650,6 @@ def sys_set_netmem_max(path, max): fp = open(path, 'w') fp.write('%d\n' %(max)) fp.close() - - -def sys_make_devices(): - if not os.access('/dev/portals', os.R_OK): - run('mknod /dev/portals c 10 240') - if not os.access('/dev/obd', os.R_OK): - run('mknod /dev/obd c 10 241') # Add dir to the global PATH, if not already there. @@ -2467,7 +2658,7 @@ def add_to_path(new_dir): if new_dir in syspath: return os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir - + def default_debug_path(): path = '/tmp/lustre-log' if os.path.isdir('/r'): @@ -2482,7 +2673,6 @@ def default_gdb_script(): else: return script - DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin') # ensure basic elements are in the system path def sanitise_path(): @@ -2493,6 +2683,9 @@ def sanitise_path(): tgt_select = {} def init_select(args): # args = [service=nodeA,service2=nodeB service3=nodeC] + # --service is analagous to: + # --group --select = + # this is handled in doHost() global tgt_select for arg in args: list = string.split(arg, ',') @@ -2515,6 +2708,7 @@ lconf_options = [ ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM), ('config', "Cluster config name used for LDAP query", PARAM), ('select', "service=nodeA,service2=nodeB ", PARAMLIST), + ('service', "shorthand for --group --select =", PARAM), ('node', "Load config for ", PARAM), ('cleanup,d', "Cleans up config. (Shutdown)"), ('force,f', "Forced unmounting and/or obd detach during cleanup", @@ -2526,6 +2720,9 @@ lconf_options = [ another node for failover purposes. This will not be a clean shutdown.""", FLAG, 0), + ('abort_recovery',"""Used to start a service when you know recovery + will not succeed. This will skip the recovery + timeout period."""), ('gdb', """Prints message after creating gdb module script and sleeps for 5 seconds."""), ('noexec,n', """Prints the commands and steps that will be run for a @@ -2535,18 +2732,21 @@ lconf_options = [ ('nosetup', "Skip device setup/cleanup step."), ('reformat', "Reformat all devices (without question)"), ('mkfsoptions', "Additional options for the mk*fs command line", PARAM), + ('mountfsoptions', "Additional options for mount fs command line", PARAM), + ('clientoptions', "Additional options for Lustre", PARAM), ('dump', "Dump the kernel debug log to file before portals is unloaded", PARAM), ('write_conf', "Save all the client config information on mds."), + ('old_conf', "Start up service even though config logs appear outdated."), ('record', "Write config information on mds."), ('record_log', "Name of config record log.", PARAM), ('record_device', "MDS device name that will record the config commands", PARAM), ('minlevel', "Minimum level of services to configure/cleanup", INTPARAM, 0), - ('maxlevel', """Maximum level of services to configure/cleanup + ('maxlevel', """Maximum level of services to configure/cleanup Levels are aproximatly like: - 10 - netwrk + 10 - network 20 - device, ldlm 30 - osd, mdd 40 - mds, ost @@ -2560,14 +2760,21 @@ lconf_options = [ ('upcall', "Set both portals and lustre upcall script", PARAM), ('lustre_upcall', "Set lustre upcall script", PARAM), ('portals_upcall', "Set portals upcall script", PARAM), + ('group_upcall', "Set supplementary group upcall program", PARAM), ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM), ('ptldebug', "Set the portals debug level", PARAM), ('subsystem', "Set the portals debug subsystem", PARAM), ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()), ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()), + ('allow_unprivileged_port', "Allow connections from unprivileged ports"), + ('clumanager', "Generate CluManager config file for this node's cluster"), + ('rawprimary', "For clumanager, device of the primary quorum", PARAM, "/dev/raw/raw1"), + ('rawsecondary', "For clumanager, device of the secondary quorum", PARAM, "/dev/raw/raw2"), + ('service_scripts', "For clumanager, directory containing per-service scripts", PARAM, "/etc/lustre/services"), + ('make_service_scripts', "Create per-service symlinks for use with clumanager"), # Client recovery options ('recover', "Recover a device"), - ('group', "The group of devices to configure or cleanup", PARAM), + ('group,g', "The group of devices to configure or cleanup", PARAM), ('tgt_uuid', "The failed target (required for recovery)", PARAM), ('client_uuid', "The failed client (required for recovery)", PARAM), ('conn_uuid', "The failed connection (required for recovery)", PARAM), @@ -2575,14 +2782,17 @@ lconf_options = [ ('inactive', """The name of an inactive service, to be ignored during mounting (currently OST-only). Can be repeated.""", PARAMLIST), - ] + ('user_xattr', """Enable user_xattr support on MDS""", FLAG, 0), + ('acl', """Enable ACL support on MDS""", FLAG, 0), + ('quota', "Enable quota support for client file system", PARAM), + ] def main(): - global lctl, config, toplevel, CONFIG_FILE + global lctl, config, toplustreDB, CONFIG_FILE # in the upcall this is set to SIG_IGN signal.signal(signal.SIGCHLD, signal.SIG_DFL) - + cl = Lustre.Options("lconf", "config.xml", lconf_options) try: config, args = cl.parse(sys.argv[1:]) @@ -2605,20 +2815,34 @@ def main(): random.seed(seed) sanitise_path() - + init_select(config.select) if len(args) > 0: - if not os.access(args[0], os.R_OK): + # allow config to be fetched via HTTP, but only with python2 + if sys.version[0] != '1' and args[0].startswith('http://'): + import urllib2 + try: + config_file = urllib2.urlopen(args[0]) + except (urllib2.URLError, socket.error), err: + if hasattr(err, 'args'): + err = err.args[1] + print "Could not access '%s': %s" %(args[0], err) + sys.exit(1) + elif not os.access(args[0], os.R_OK): print 'File not found or readable:', args[0] sys.exit(1) + else: + # regular file + config_file = open(args[0], 'r') try: - dom = xml.dom.minidom.parse(args[0]) + dom = xml.dom.minidom.parse(config_file) except Exception: panic("%s does not appear to be a config file." % (args[0])) sys.exit(1) # make sure to die here, even in debug mode. + config_file.close() CONFIG_FILE = args[0] - db = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement) + lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement) if not config.config: config.config = os.path.basename(args[0])# use full path? if config.config[-4:] == '.xml': @@ -2627,7 +2851,7 @@ def main(): if not config.config: panic("--ldapurl requires --config name") dn = "config=%s,fs=lustre" % (config.config) - db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl) + lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl) elif config.ptldebug or config.subsystem: sys_set_ptldebug(None) sys_set_subsystem(None) @@ -2637,9 +2861,13 @@ def main(): print 'see lconf --help for command summary' sys.exit(1) - toplevel = db + if config.reformat and config.cleanup: + panic("Options \"reformat\" and \"cleanup\" are incompatible. "+ + "Please specify only one.") - ver = db.get_version() + toplustreDB = lustreDB + + ver = lustreDB.get_version() if not ver: panic("No version found in config data, please recreate.") if ver != Lustre.CONFIG_VERSION: @@ -2652,7 +2880,7 @@ def main(): else: if len(host) > 0: node_list.append(host) - node_list.append('localhost') +# node_list.append('localhost') debug("configuring for host: ", node_list) @@ -2665,13 +2893,19 @@ def main(): if config.lctl_dump: lctl.use_save_file(config.lctl_dump) + if not (config.reformat or config.write_conf or config.cleanup): + doCheckMtime(lustreDB, node_list) + if config.record: if not (config.record_device and config.record_log): panic("When recording, both --record_log and --record_device must be specified.") lctl.clear_log(config.record_device, config.record_log) lctl.record(config.record_device, config.record_log) - doHost(db, node_list) + if config.clumanager: + doClumanager(lustreDB, node_list) + else: + doHost(lustreDB, node_list) if config.record: lctl.end_record() @@ -2685,7 +2919,10 @@ if __name__ == "__main__": sys.exit(1) except CommandError, e: e.dump() - sys.exit(e.rc) + rc = e.rc + if rc == 0: + rc = 1 + sys.exit(rc) if first_cleanup_error: sys.exit(first_cleanup_error)