X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Futils%2Flconf;h=08cd122d9332bb35d0fa4d914fd470633c41e90b;hb=4527a65cc1a46740c8edee7557a3cdd7ce035d87;hp=2f4130b5a493dc67af4e71ff9c42a10dc0e2fdc7;hpb=37113b4cd285399154f0d8a27df7d4090cc5fee4;p=fs%2Flustre-release.git diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 2f4130b..08cd122 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -58,7 +58,7 @@ DEFAULT_PORT = 988 # Maximum number of devices to search for. # (the /dev/loop* nodes need to be created beforehand) MAX_LOOP_DEVICES = 256 -PORTALS_DIR = 'portals' +PORTALS_DIR = '../portals' # Needed to call lconf --record CONFIG_FILE = "" @@ -88,7 +88,12 @@ ptldebug_names = { "rpctrace" : (1 << 20), "vfstrace" : (1 << 21), "reada" : (1 << 22), - } + "mmap" : (1 << 23), + "config" : (1 << 24), + "console" : (1 << 25), + "quota" : (1 << 26), + "sec" : (1 << 27), +} subsystem_names = { "undefined" : (1 << 0), @@ -102,18 +107,21 @@ subsystem_names = { "rpc" : (1 << 8), "mgmt" : (1 << 9), "portals" : (1 << 10), - "socknal" : (1 << 11), - "qswnal" : (1 << 12), - "pinger" : (1 << 13), - "filter" : (1 << 14), - "ptlbd" : (1 << 15), - "echo" : (1 << 16), - "ldlm" : (1 << 17), - "lov" : (1 << 18), - "gmnal" : (1 << 19), - "ptlrouter" : (1 << 20), - "cobd" : (1 << 21), - "ibnal" : (1 << 22), + "nal" : (1 << 11), + "pinger" : (1 << 12), + "filter" : (1 << 13), + "ptlbd" : (1 << 14), + "echo" : (1 << 15), + "ldlm" : (1 << 16), + "lov" : (1 << 17), + "ptlrouter" : (1 << 18), + "cobd" : (1 << 19), + "sm" : (1 << 20), + "asobd" : (1 << 21), + "confobd" : (1 << 22), + "lmv" : (1 << 23), + "cmobd" : (1 << 24), + "sec" : (1 << 25), } @@ -127,7 +135,7 @@ def cleanup_error(rc): # debugging and error funcs def fixme(msg = "this feature"): - raise Lustre.LconfError, msg + ' not implmemented yet.' + raise Lustre.LconfError, msg + ' not implemented yet.' def panic(*args): msg = string.join(map(str,args)) @@ -213,8 +221,11 @@ class DaemonHandler: if self.running(): pid = self.read_pidfile() try: - log ("killing process", pid) - os.kill(pid, 15) + if pid != 1: + log ("killing process", pid) + os.kill(pid, 15) + else: + log("was unable to find pid of " + self.command) #time.sleep(1) # let daemon die except OSError, e: log("unable to kill", self.command, e) @@ -225,7 +236,10 @@ class DaemonHandler: pid = self.read_pidfile() if pid: try: - os.kill(pid, 0) + if pid != 1: + os.kill(pid, 0) + else: + log("was unable to find pid of " + self.command) except OSError: self.clean_pidfile() else: @@ -235,7 +249,10 @@ class DaemonHandler: def read_pidfile(self): try: fp = open(self.pidfile(), 'r') - pid = int(fp.read()) + val = fp.read() + if val == '': + val = '1' + pid = int(val) fp.close() return pid except IOError: @@ -250,21 +267,16 @@ class DaemonHandler: log(self.pidfile(), e) class AcceptorHandler(DaemonHandler): - def __init__(self, port, net_type, send_mem, recv_mem, irq_aff): + def __init__(self, port, net_type): DaemonHandler.__init__(self, "acceptor") self.port = port self.flags = '' - self.send_mem = send_mem - self.recv_mem = recv_mem - - if irq_aff: - self.flags = self.flags + ' -i' def pidfile(self): return "/var/run/%s-%d.pid" % (self.command, self.port) def command_line(self): - return string.join(map(str,('-s', self.send_mem, '-r', self.recv_mem, self.flags, self.port))) + return string.join(map(str,(self.flags, self.port))) acceptors = {} @@ -356,6 +368,7 @@ class LCTLInterface: child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command child.tochild.write(cmds + "\n") child.tochild.close() +# print "LCTL:", cmds # From "Python Cookbook" from O'Reilly outfile = child.fromchild @@ -400,7 +413,6 @@ class LCTLInterface: raise CommandError(self.lctl, out, rc) return rc, out - def clear_log(self, dev, log): """ clear an existing log """ cmds = """ @@ -410,6 +422,13 @@ class LCTLInterface: quit """ % (dev, log) self.run(cmds) + def root_squash(self, name, uid, nid): + cmds = """ + device $%s + root_squash %s %s + quit""" % (name, uid, nid) + self.run(cmds) + def network(self, net, nid): """ set mynid """ cmds = """ @@ -418,33 +437,58 @@ class LCTLInterface: quit """ % (net, nid) self.run(cmds) + # add an interface + def add_interface(self, net, ip, netmask = ""): + """ add an interface """ + cmds = """ + network %s + add_interface %s %s + quit """ % (net, ip, netmask) + self.run(cmds) + + # delete an interface + def del_interface(self, net, ip): + """ delete an interface """ + cmds = """ + network %s + del_interface %s + quit """ % (net, ip) + self.run(cmds) + # create a new connection def add_uuid(self, net_type, uuid, nid): cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type) self.run(cmds) - def add_autoconn(self, net_type, send_mem, recv_mem, nid, hostaddr, - port, flags): - if net_type in ('tcp',) and not config.lctl_dump: + def add_peer(self, net_type, nid, hostaddr, port): + if net_type in ('tcp','openib','ra') and not config.lctl_dump: cmds = """ network %s - send_mem %d - recv_mem %d - add_autoconn %s %s %d %s + add_peer %s %s %d quit""" % (net_type, - send_mem, - recv_mem, - nid, hostaddr, port, flags ) + nid, hostaddr, port ) + self.run(cmds) + elif net_type in ('iib',) and not config.lctl_dump: + cmds = """ + network %s + add_peer %s + quit""" % (net_type, + nid ) + self.run(cmds) + elif net_type in ('vib',) and not config.lctl_dump: + cmds = """ + network %s + add_peer %s %s + quit""" % (net_type, + nid, hostaddr ) self.run(cmds) def connect(self, srv): self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid) - if srv.net_type in ('tcp',) and not config.lctl_dump: - flags = 's' - if srv.irq_affinity: - flags = flags + 'i' - self.add_autoconn(srv.net_type, srv.send_mem, srv.recv_mem, - srv.nid, srv.hostaddr, srv.port, flags) + if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump: + if srv.hostaddr[0]: + hostaddr = string.split(srv.hostaddr[0], '/')[0] + self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port) # Recover a device def recover(self, dev_name, new_conn): @@ -499,21 +543,31 @@ class LCTLInterface: self.run(cmds) - def del_autoconn(self, net_type, nid, hostaddr): + def del_peer(self, net_type, nid, hostaddr): if net_type in ('tcp',) and not config.lctl_dump: cmds = """ ignore_errors network %s - del_autoconn %s %s s + del_peer %s %s single_share quit""" % (net_type, nid, hostaddr) self.run(cmds) + elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump: + cmds = """ + ignore_errors + network %s + del_peer %s single_share + quit""" % (net_type, + nid) + self.run(cmds) # disconnect one connection def disconnect(self, srv): self.del_uuid(srv.nid_uuid) - if srv.net_type in ('tcp',) and not config.lctl_dump: - self.del_autoconn(srv.net_type, srv.nid, srv.hostaddr) + if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump: + if srv.hostaddr[0]: + hostaddr = string.split(srv.hostaddr[0], '/')[0] + self.del_peer(srv.net_type, srv.nid, hostaddr) def del_uuid(self, uuid): cmds = """ @@ -537,24 +591,51 @@ class LCTLInterface: quit""" % (type, name, uuid) self.run(cmds) - def setup(self, name, setup = ""): + def detach(self, name): + cmds = """ + cfg_device %s + detach + quit""" % (name) + self.run(cmds) + + def set_security(self, name, key, value): + cmds = """ + cfg_device %s + set_security %s %s + quit""" % (name, key, value) + self.run(cmds) + + def setup(self, name, setup = ""): cmds = """ cfg_device %s setup %s quit""" % (name, setup) self.run(cmds) + def add_conn(self, name, conn_uuid): + cmds = """ + cfg_device %s + add_conn %s + quit""" % (name, conn_uuid) + self.run(cmds) + + def start(self, name, conf_name): + cmds = """ + device $%s + start %s + quit""" % (name, conf_name) + self.run(cmds) # create a new device with lctl def newdev(self, type, name, uuid, setup = ""): - self.attach(type, name, uuid); + if type != 'mds': + self.attach(type, name, uuid); try: self.setup(name, setup) except CommandError, e: self.cleanup(name, uuid, 0) raise e - # cleanup a device def cleanup(self, name, uuid, force, failover = 0): if failover: force = 1 @@ -568,9 +649,8 @@ class LCTLInterface: self.run(cmds) # create an lov - def lov_setup(self, name, uuid, desc_uuid, mdsuuid, stripe_cnt, - stripe_sz, stripe_off, - pattern, devlist): + def lov_setup(self, name, uuid, desc_uuid, stripe_cnt, + stripe_sz, stripe_off, pattern, devlist = None): cmds = """ attach lov %s %s lov_setup %s %d %d %d %s %s @@ -578,13 +658,34 @@ class LCTLInterface: pattern, devlist) self.run(cmds) - # create an lov - def lov_setconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, - pattern, devlist): + # add an OBD to a LOV + def lov_add_obd(self, name, uuid, obd_uuid, index, gen): cmds = """ - cfg_device $%s - lov_setconfig %s %d %d %d %s %s - quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist) + lov_modify_tgts add %s %s %s %s + quit""" % (name, obd_uuid, index, gen) + self.run(cmds) + + # create an lmv + def lmv_setup(self, name, uuid, desc_uuid, devlist): + cmds = """ + attach lmv %s %s + lmv_setup %s %s + quit""" % (name, uuid, desc_uuid, devlist) + self.run(cmds) + + # delete an OBD from a LOV + def lov_del_obd(self, name, uuid, obd_uuid, index, gen): + cmds = """ + lov_modify_tgts del %s %s %s %s + quit""" % (name, obd_uuid, index, gen) + self.run(cmds) + + # deactivate an OBD + def deactivate(self, name): + cmds = """ + device $%s + deactivate + quit""" % (name) self.run(cmds) # dump the log file @@ -632,7 +733,6 @@ class LCTLInterface: quit""" % (timeout,) self.run(cmds) - # delete mount options def set_lustre_upcall(self, upcall): cmds = """ set_lustre_upcall %s @@ -700,17 +800,6 @@ def do_find_file(base, mod): if module: return module -def find_module(src_dir, dev_dir, modname): - modbase = src_dir +'/'+ dev_dir +'/'+ modname - for modext in '.ko', '.o': - module = modbase + modext - try: - if os.access(module, os.R_OK): - return module - except OSError: - pass - return None - # is the path a block device? def is_block(path): s = () @@ -720,6 +809,19 @@ def is_block(path): return 0 return stat.S_ISBLK(s[stat.ST_MODE]) +# find the journal device from mkfs options +def jdev(opts): + if opts == None: + return '' + x=string.split(opts) + i=0 + while i < len(x) - 1: + if x[i] == '-J' and x[i+1].startswith('device='): + str=x[i+1] + return str[7:] + i=i+1 + return '' + # build fs according to type # fixme: dangerous def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): @@ -735,14 +837,54 @@ def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): if fstype in ('ext3', 'extN', 'ldiskfs'): # ext3 journal size is in megabytes - if jsize == 0: + # but don't set jsize if mkfsoptions indicates a separate journal device + if jsize == 0 and jdev(mkfsoptions) == '': if devsize == 0: if not is_block(dev): ret, out = runcmd("ls -l %s" %dev) devsize = int(string.split(out[0])[4]) / 1024 else: + # sfdisk works for symlink, hardlink, and realdev ret, out = runcmd("sfdisk -s %s" %dev) - devsize = int(out[0]) + if not ret: + devsize = int(out[0]) + else: + # sfdisk -s will fail for too large block device, + # then, read the size of partition from /proc/partitions + + # get the realpath of the device + # it may be the real device, such as /dev/hda7 + # or the hardlink created via mknod for a device + if 'realpath' in dir(os.path): + real_dev = os.path.realpath(dev) + else: + real_dev = dev + link_count = 0 + while os.path.islink(real_dev) and (link_count < 20): + link_count = link_count + 1 + dev_link = os.readlink(real_dev) + if os.path.isabs(dev_link): + real_dev = dev_link + else: + real_dev = os.path.join(os.path.dirname(real_dev), dev_link) + if link_count > 19: + panic("Entountered too many symbolic links resolving block device:", dev) + + # get the major and minor number of the realpath via ls + # it seems python(os.stat) does not return + # the st_rdev member of the stat structure + ret, out = runcmd("ls -l %s" %real_dev) + major = string.split(string.split(out[0])[4], ",")[0] + minor = string.split(out[0])[5] + + # get the devsize from /proc/partitions with the major and minor number + ret, out = runcmd("cat /proc/partitions") + for line in out: + if len(line) > 1: + if string.split(line)[0] == major and string.split(line)[1] == minor: + devsize = int(string.split(line)[2]) + break + if devsize > 1024 * 1024: jsize = ((devsize / 102400) * 4) if jsize > 400: @@ -752,6 +894,14 @@ def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): mkfs = 'mkfs.ext2 -j -b 4096 ' if not isblock or config.force: mkfs = mkfs + ' -F ' + if jdev(mkfsoptions) != '': + jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev ' + if config.force: + jmkfs = jmkfs + '-F ' + jmkfs = jmkfs + jdev(mkfsoptions) + (ret, out) = run (jmkfs) + if ret: + panic("Unable format journal device:", jdev(mkfsoptions), string.join(out)) elif fstype == 'reiserfs': # reiserfs journal size is in blocks if jsize: jopt = "--journal_size %d" %(jsize,) @@ -783,8 +933,8 @@ def loop_base(): panic ("can't access loop devices") return loop -# find loop device assigned to thefile -def find_loop(file): +# find loop device assigned to the file +def find_assigned_loop(file): loop = loop_base() for n in xrange(0, MAX_LOOP_DEVICES): dev = loop + str(n) @@ -794,49 +944,76 @@ def find_loop(file): m = re.search(r'\((.*)\)', out[0]) if m and file == m.group(1): return dev - else: - break return '' -# create file if necessary and assign the first free loop device -def init_loop(file, size, fstype, journal_size, inode_size, mkfsoptions, reformat): - dev = find_loop(file) - if dev: - print 'WARNING file:', file, 'already mapped to', dev - return dev - if reformat or not os.access(file, os.R_OK | os.W_OK): - if size < 8000: - panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (file,size)) - (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, - file)) - if ret: - panic("Unable to create backing store:", file) - mkfs(file, size, fstype, journal_size, inode_size, mkfsoptions, isblock=0) - +# find free loop device +def find_free_loop(file): loop = loop_base() + # find next free loop for n in xrange(0, MAX_LOOP_DEVICES): dev = loop + str(n) if os.access(dev, os.R_OK): (stat, out) = run('losetup', dev) if stat: - run('losetup', dev, file) return dev - else: - print "out of loop devices" - return '' - print "out of loop devices" return '' -# undo loop assignment -def clean_loop(file): - dev = find_loop(file) +# create file if necessary and assign the first free loop device +def init_loop(file, size, fstype, journal_size, inode_size, + mkfsoptions, reformat, autoformat, backfstype, backfile): + if fstype == 'smfs': + realfile = backfile + realfstype = backfstype + if is_block(backfile): + if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'): + mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0) + return realfile + else: + realfile = file + realfstype = fstype + + dev = find_assigned_loop(realfile) if dev: - ret, out = run('losetup -d', dev) + print 'WARNING: file', realfile, 'already mapped to', dev + return dev + + if reformat or not os.access(realfile, os.R_OK | os.W_OK): + (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile)) if ret: - log('unable to clean loop device:', dev, 'for file:', file) - logall(out) + panic("Unable to create backing store:", realfile) + mkfs(realfile, size, realfstype, journal_size, inode_size, + mkfsoptions, isblock=0) + dev = find_free_loop(realfile) + if dev: + print "attach " + realfile + " <-> " + dev + run('losetup', dev, realfile) + return dev + + print "out of loop devices" + return '' + +# undo loop assignment +def clean_loop(dev, fstype, backfstype, backdev): + if fstype == 'smfs': + realfile = backdev + else: + realfile = dev + if not is_block(realfile): + dev = find_assigned_loop(realfile) + if dev: + print "detach " + dev + " <-> " + realfile + ret, out = run('losetup -d', dev) + if ret: + log('unable to clean loop device', dev, 'for file', realfile) + logall(out) + +# finilizes passed device +def clean_dev(dev, fstype, backfstype, backdev): + if fstype == 'smfs' or not is_block(dev): + clean_loop(dev, fstype, backfstype, backdev) + # determine if dev is formatted as a filesystem def need_format(fstype, dev): # FIXME don't know how to implement this @@ -844,11 +1021,13 @@ def need_format(fstype, dev): # initialize a block device if needed def block_dev(dev, size, fstype, reformat, autoformat, journal_size, - inode_size, mkfsoptions): - if config.noexec: return dev - if not is_block(dev): + inode_size, mkfsoptions, backfstype, backdev): + if config.noexec: + return dev + + if fstype == 'smfs' or not is_block(dev): dev = init_loop(dev, size, fstype, journal_size, inode_size, - mkfsoptions, reformat) + mkfsoptions, reformat, autoformat, backfstype, backdev) elif reformat or (need_format(fstype, dev) and autoformat == 'yes'): mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions, isblock=0) @@ -868,6 +1047,17 @@ def if2addr(iface): ip = string.split(addr, ':')[1] return ip +def def_mount_options(fstype, target): + """returns deafult mount options for passed fstype and target (mds, ost)""" + if fstype == 'ext3' or fstype == 'ldiskfs': + mountfsoptions = "errors=remount-ro" + if target == 'ost' and sys_get_branch() == '2.4': + mountfsoptions = "%s,asyncdel" % (mountfsoptions) + if target == 'ost' and sys_get_branch() == '2.6': + mountfsoptions = "%s,extents,mballoc" % (mountfsoptions) + return mountfsoptions + return "" + def sys_get_elan_position_file(): procfiles = ["/proc/elan/device0/position", "/proc/qsnet/elan4/device0/position", @@ -889,7 +1079,7 @@ def sys_get_local_nid(net_type, wildcard, cluster_id): def sys_get_local_address(net_type, wildcard, cluster_id): """Return the local address for the network type.""" local = "" - if net_type in ('tcp',): + if net_type in ('tcp','openib','iib','vib','ra'): if ':' in wildcard: iface, star = string.split(wildcard, ':') local = if2addr(iface) @@ -919,23 +1109,27 @@ def sys_get_local_address(net_type, wildcard, cluster_id): local = elan_id except IOError, e: log(e) + elif net_type == 'lo': + fixme("automatic local address for loopback") elif net_type == 'gm': fixme("automatic local address for GM") return local -def mod_loaded(modname): - """Check if a module is already loaded. Look in /proc/modules for it.""" +def sys_get_branch(): + """Returns kernel release""" try: - fp = open('/proc/modules') + fp = open('/proc/sys/kernel/osrelease') lines = fp.readlines() fp.close() - # please forgive my tired fingers for this one - ret = filter(lambda word, mod=modname: word == mod, - map(lambda line: string.split(line)[0], lines)) - return ret - except Exception, e: - return 0 + + for l in lines: + version = string.split(l) + a = string.split(version[0], '.') + return a[0] + '.' + a[1] + except IOError, e: + log(e) + return "" # XXX: instead of device_list, ask for $name and see what we get def is_prepared(name): @@ -955,7 +1149,7 @@ def is_prepared(name): e.dump() return 0 -def is_network_prepared(): +def net_is_prepared(): """If the any device exists, then assume that all networking has been configured""" out = lctl.device_list() @@ -974,57 +1168,163 @@ def fs_is_mounted(path): except IOError, e: log(e) return 0 - + +def kmod_find(src_dir, dev_dir, modname): + modbase = src_dir +'/'+ dev_dir +'/'+ modname + for modext in '.ko', '.o': + module = modbase + modext + try: + if os.access(module, os.R_OK): + return module + except OSError: + pass + return None + +def kmod_info(modname): + """Returns reference count for passed module name.""" + try: + fp = open('/proc/modules') + lines = fp.readlines() + fp.close() + + # please forgive my tired fingers for this one + ret = filter(lambda word, mod = modname: word[0] == mod, + map(lambda line: string.split(line), lines)) + if not ret: + return '' + return ret[0] + except Exception, e: + return 0 class kmod: + """Presents kernel module""" + def __init__(self, src_dir, dev_dir, name): + self.src_dir = src_dir + self.dev_dir = dev_dir + self.name = name + + # FIXME we ignore the failure of loading gss module, because we might + # don't need it at all. + def load(self): + """Load module""" + log ('loading module:', self.name, 'srcdir', + self.src_dir, 'devdir', self.dev_dir) + if self.src_dir: + module = kmod_find(self.src_dir, self.dev_dir, + self.name) + if not module and self.name != 'ptlrpcs_gss': + panic('module not found:', self.name) + (rc, out) = run('/sbin/insmod', module) + if rc: + if self.name == 'ptlrpcs_gss': + print "Warning: not support gss security!" + else: + raise CommandError('insmod', out, rc) + else: + (rc, out) = run('/sbin/modprobe', self.name) + if rc: + if self.name == 'ptlrpcs_gss': + print "Warning: not support gss security!" + else: + raise CommandError('modprobe', out, rc) + + def cleanup(self): + """Unload module""" + log('unloading module:', self.name) + (rc, out) = run('/sbin/rmmod', self.name) + if rc: + log('unable to unload module:', self.name + + "(" + self.refcount() + ")") + logall(out) + + def info(self): + """Returns module info if any.""" + return kmod_info(self.name) + + def loaded(self): + """Returns 1 if module is loaded. Otherwise 0 is returned.""" + if self.info(): + return 1 + else: + return 0 + + def refcount(self): + """Returns module refcount.""" + info = self.info() + if not info: + return '' + return info[2] + + def used(self): + """Returns 1 if module is used, otherwise 0 is returned.""" + info = self.info() + if not info: + return 0 + if len(info) > 3: + users = info[3] + if users and users != '(unused)' and users != '-': + return 1 + else: + return 0 + else: + return 0 + + def busy(self): + """Returns 1 if module is busy, otherwise 0 is returned.""" + if self.loaded() and (self.used() or self.refcount() != '0'): + return 1 + else: + return 0 + +class kmod_manager: """Manage kernel modules""" def __init__(self, lustre_dir, portals_dir): self.lustre_dir = lustre_dir self.portals_dir = portals_dir self.kmodule_list = [] + def find_module(self, modname): + """Find module by module name""" + for mod in self.kmodule_list: + if mod.name == modname: + return mod + return '' + def add_portals_module(self, dev_dir, modname): """Append a module to list of modules to load.""" - self.kmodule_list.append((self.portals_dir, dev_dir, modname)) + + mod = self.find_module(modname) + if not mod: + mod = kmod(self.portals_dir, dev_dir, modname) + self.kmodule_list.append(mod) def add_lustre_module(self, dev_dir, modname): """Append a module to list of modules to load.""" - self.kmodule_list.append((self.lustre_dir, dev_dir, modname)) - def load_module(self): + mod = self.find_module(modname) + if not mod: + mod = kmod(self.lustre_dir, dev_dir, modname) + self.kmodule_list.append(mod) + + def load_modules(self): """Load all the modules in the list in the order they appear.""" - for src_dir, dev_dir, mod in self.kmodule_list: - if mod_loaded(mod) and not config.noexec: + for mod in self.kmodule_list: + if mod.loaded() and not config.noexec: continue - log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir) - if src_dir: - module = find_module(src_dir, dev_dir, mod) - if not module: - panic('module not found:', mod) - (rc, out) = run('/sbin/insmod', module) - if rc: - raise CommandError('insmod', out, rc) - else: - (rc, out) = run('/sbin/modprobe', mod) - if rc: - raise CommandError('modprobe', out, rc) + mod.load() - def cleanup_module(self): + def cleanup_modules(self): """Unload the modules in the list in reverse order.""" rev = self.kmodule_list rev.reverse() - for src_dir, dev_dir, mod in rev: - if not mod_loaded(mod) and not config.noexec: + for mod in rev: + if (not mod.loaded() or mod.busy()) and not config.noexec: continue # debug hack - if mod == 'portals' and config.dump: + if mod.name == 'portals' and config.dump: lctl.dump(config.dump) - log('unloading module:', mod) - (rc, out) = run('/sbin/rmmod', mod) - if rc: - log('! unable to unload module:', mod) - logall(out) - + mod.cleanup() + # ============================================================ # Classes to prepare and cleanup the various objects # @@ -1039,8 +1339,7 @@ class Module: self.uuid = self.db.getUUID() self._server = None self._connected = 0 - self.kmod = kmod(config.lustre, config.portals) - + def info(self, *args): msg = string.join(map(str,args)) print self.module_name + ":", self.name, self.uuid, msg @@ -1054,27 +1353,14 @@ class Module: log(self.module_name, "cleanup failed: ", self.name) e.dump() cleanup_error(e.rc) - - def add_portals_module(self, dev_dir, modname): - """Append a module to list of modules to load.""" - self.kmod.add_portals_module(dev_dir, modname) - - def add_lustre_module(self, dev_dir, modname): - """Append a module to list of modules to load.""" - self.kmod.add_lustre_module(dev_dir, modname) - def load_module(self): - """Load all the modules in the list in the order they appear.""" - self.kmod.load_module() - - def cleanup_module(self): - """Unload the modules in the list in reverse order.""" - if self.safe_to_clean(): - self.kmod.cleanup_module() + def add_module(self, manager): + """Adds all needed modules in the order they appear.""" + return def safe_to_clean(self): return 1 - + def safe_to_clean_modules(self): return self.safe_to_clean() @@ -1085,9 +1371,6 @@ class Network(Module): self.nid = self.db.get_val('nid', '*') self.cluster_id = self.db.get_val('clusterid', "0") self.port = self.db.get_val_int('port', 0) - self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF) - self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF) - self.irq_affinity = self.db.get_val_int('irqaffinity', 0) if '*' in self.nid: self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id) @@ -1099,36 +1382,56 @@ class Network(Module): self.generic_nid = 0 self.nid_uuid = self.nid_to_uuid(self.nid) - - self.hostaddr = self.db.get_val('hostaddr', self.nid) - if '*' in self.hostaddr: - self.hostaddr = sys_get_local_address(self.net_type, self.hostaddr, self.cluster_id) - if not self.hostaddr: - panic("unable to set hostaddr for", self.net_type, self.hostaddr, self.cluster_id) - debug("hostaddr:", self.hostaddr) - - self.add_portals_module("libcfs", 'libcfs') - self.add_portals_module("portals", 'portals') - if node_needs_router(): - self.add_portals_module("router", 'kptlrouter') + self.hostaddr = self.db.get_hostaddr() + if len(self.hostaddr) == 0: + self.hostaddr.append(self.nid) + if '*' in self.hostaddr[0]: + self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id) + if not self.hostaddr[0]: + panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id) + debug("hostaddr:", self.hostaddr[0]) + + def add_module(self, manager): + manager.add_portals_module("libcfs", 'libcfs') + manager.add_portals_module("portals", 'portals') + + if node_needs_router(): + manager.add_portals_module("router", 'kptlrouter') if self.net_type == 'tcp': - self.add_portals_module("knals/socknal", 'ksocknal') + manager.add_portals_module("knals/socknal", 'ksocknal') if self.net_type == 'elan': - self.add_portals_module("knals/qswnal", 'kqswnal') + manager.add_portals_module("knals/qswnal", 'kqswnal') if self.net_type == 'gm': - self.add_portals_module("knals/gmnal", 'kgmnal') + manager.add_portals_module("knals/gmnal", 'kgmnal') + if self.net_type == 'openib': + manager.add_portals_module("knals/openibnal", 'kopenibnal') + if self.net_type == 'iib': + manager.add_portals_module("knals/iibnal", 'kiibnal') + if self.net_type == 'vib': + self.add_portals_module("knals/vibnal", 'kvibnal') + if self.net_type == 'lo': + manager.add_portals_module("knals/lonal", 'klonal') + if self.net_type == 'ra': + manager.add_portals_module("knals/ranal", 'kranal') def nid_to_uuid(self, nid): return "NID_%s_UUID" %(nid,) def prepare(self): - if is_network_prepared(): + if not config.record and net_is_prepared(): return self.info(self.net_type, self.nid, self.port) if not (config.record and self.generic_nid): lctl.network(self.net_type, self.nid) if self.net_type == 'tcp': sys_tweak_socknal() + for hostaddr in self.db.get_hostaddr(): + ip = string.split(hostaddr, '/')[0] + if len(string.split(hostaddr, '/')) == 2: + netmask = string.split(hostaddr, '/')[1] + else: + netmask = "" + lctl.add_interface(self.net_type, ip, netmask) if self.net_type == 'elan': sys_optimize_elan() if self.port and node_is_router(): @@ -1163,7 +1466,7 @@ class Network(Module): cleanup_error(e.rc) def safe_to_clean(self): - return not is_network_prepared() + return not net_is_prepared() def cleanup(self): self.info(self.net_type, self.nid, self.port) @@ -1171,6 +1474,13 @@ class Network(Module): stop_acceptor(self.port) if node_is_router(): self.disconnect_peer_gateways() + if self.net_type == 'tcp': + for hostaddr in self.db.get_hostaddr(): + ip = string.split(hostaddr, '/')[0] + lctl.del_interface(self.net_type, ip) + + def correct_level(self, level, op=None): + return level class RouteTable(Module): def __init__(self,db): @@ -1178,9 +1488,9 @@ class RouteTable(Module): def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi): - # only setup connections for tcp NALs + # only setup connections for tcp, openib, and iib NALs srvdb = None - if not net_type in ('tcp',): + if not net_type in ('tcp','openib','iib','vib','ra'): return None # connect to target if route is to single node and this node is the gw @@ -1202,7 +1512,7 @@ class RouteTable(Module): return Network(srvdb) def prepare(self): - if is_network_prepared(): + if not config.record and net_is_prepared(): return self.info() for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl(): @@ -1212,10 +1522,10 @@ class RouteTable(Module): lctl.connect(srv) def safe_to_clean(self): - return not is_network_prepared() + return not net_is_prepared() def cleanup(self): - if is_network_prepared(): + if net_is_prepared(): # the network is still being used, don't clean it up return for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl(): @@ -1238,13 +1548,15 @@ class RouteTable(Module): class Management(Module): def __init__(self, db): Module.__init__(self, 'MGMT', db) - self.add_lustre_module('lvfs', 'lvfs') - self.add_lustre_module('obdclass', 'obdclass') - self.add_lustre_module('ptlrpc', 'ptlrpc') - self.add_lustre_module('mgmt', 'mgmt_svc') + + def add_module(self, manager): + manager.add_lustre_module('lvfs', 'lvfs') + manager.add_lustre_module('obdclass', 'obdclass') + manager.add_lustre_module('ptlrpc', 'ptlrpc') + manager.add_lustre_module('mgmt', 'mgmt_svc') def prepare(self): - if is_prepared(self.name): + if not config.record and is_prepared(self.name): return self.info() lctl.newdev("mgmt", self.name, self.uuid) @@ -1256,14 +1568,21 @@ class Management(Module): if is_prepared(self.name): Module.cleanup(self) + def correct_level(self, level, op=None): + return level + # This is only needed to load the modules; the LDLM device # is now created automatically. class LDLM(Module): def __init__(self,db): Module.__init__(self, 'LDLM', db) - self.add_lustre_module('lvfs', 'lvfs') - self.add_lustre_module('obdclass', 'obdclass') - self.add_lustre_module('ptlrpc', 'ptlrpc') + + def add_module(self, manager): + manager.add_lustre_module('lvfs', 'lvfs') + manager.add_lustre_module('obdclass', 'obdclass') + manager.add_lustre_module('sec', 'ptlrpcs') + manager.add_lustre_module('ptlrpc', 'ptlrpc') + manager.add_lustre_module('sec/gss', 'ptlrpcs_gss') def prepare(self): return @@ -1271,19 +1590,22 @@ class LDLM(Module): def cleanup(self): return + def correct_level(self, level, op=None): + return level + class LOV(Module): def __init__(self, db, uuid, fs_name, name_override = None, config_only = None): Module.__init__(self, 'LOV', db) if name_override != None: self.name = "lov_%s" % name_override - self.add_lustre_module('lov', 'lov') self.mds_uuid = self.db.get_first_ref('mds') - self.stripe_sz = self.db.get_val_int('stripesize', 65536) + self.stripe_sz = self.db.get_val_int('stripesize', 1048576) self.stripe_off = self.db.get_val_int('stripeoffset', 0) self.pattern = self.db.get_val_int('stripepattern', 0) - self.devlist = self.db.get_refs('obd') + self.devlist = self.db.get_lov_tgts('lov_tgt') self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist)) self.osclist = [] + self.obdlist = [] self.desc_uuid = self.uuid self.uuid = generate_client_uuid(self.name) self.fs_name = fs_name @@ -1291,193 +1613,368 @@ class LOV(Module): self.config_only = 1 return self.config_only = None - mds= self.db.lookup(self.mds_uuid) + mds = self.db.lookup(self.mds_uuid) self.mds_name = mds.getName() - for obd_uuid in self.devlist: - obd = self.db.lookup(obd_uuid) + for (obd_uuid, index, gen, active) in self.devlist: + if obd_uuid == '': + continue + self.obdlist.append(obd_uuid) + obd = self.db.lookup(obd_uuid) osc = get_osc(obd, self.uuid, fs_name) if osc: - self.osclist.append(osc) + self.osclist.append((osc, index, gen, active)) else: panic('osc not found:', obd_uuid) - + def get_uuid(self): + return self.uuid + def get_name(self): + return self.name def prepare(self): - if is_prepared(self.name): + if not config.record and is_prepared(self.name): return - if self.config_only: - panic("Can't prepare config_only LOV ", self.name) - - for osc in self.osclist: + self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, + self.stripe_off, self.pattern, self.devlist, + self.mds_name) + lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt, + self.stripe_sz, self.stripe_off, self.pattern, + string.join(self.obdlist)) + for (osc, index, gen, active) in self.osclist: + target_uuid = osc.target_uuid try: # Only ignore connect failures with --force, which # isn't implemented here yet. + osc.active = active osc.prepare(ignore_connect_failure=0) except CommandError, e: print "Error preparing OSC %s\n" % osc.uuid raise e - self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, - self.stripe_off, self.pattern, self.devlist, self.mds_name) - lctl.lov_setup(self.name, self.uuid, - self.desc_uuid, self.mds_name, self.stripe_cnt, - self.stripe_sz, self.stripe_off, self.pattern, - string.join(self.devlist)) + lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen) def cleanup(self): + for (osc, index, gen, active) in self.osclist: + target_uuid = osc.target_uuid + osc.cleanup() if is_prepared(self.name): Module.cleanup(self) if self.config_only: panic("Can't clean up config_only LOV ", self.name) - for osc in self.osclist: - osc.cleanup() - def load_module(self): + def add_module(self, manager): if self.config_only: panic("Can't load modules for config_only LOV ", self.name) - for osc in self.osclist: - osc.load_module() + for (osc, index, gen, active) in self.osclist: + osc.add_module(manager) break - Module.load_module(self) + manager.add_lustre_module('lov', 'lov') - def cleanup_module(self): - if self.config_only: - panic("Can't cleanup modules for config_only LOV ", self.name) - Module.cleanup_module(self) - for osc in self.osclist: - osc.cleanup_module() + def correct_level(self, level, op=None): + return level + +class LMV(Module): + def __init__(self, db, uuid, fs_name, name_override = None): + Module.__init__(self, 'LMV', db) + if name_override != None: + self.name = "lmv_%s" % name_override + + self.devlist = self.db.get_lmv_tgts('lmv_tgt') + if self.devlist == None: + self.devlist = self.db.get_refs('mds') + + self.mdclist = [] + self.desc_uuid = self.uuid + self.uuid = uuid + self.fs_name = fs_name + for mds_uuid in self.devlist: + mds = self.db.lookup(mds_uuid) + if not mds: + panic("MDS not found!") + mdc = MDC(mds, self.uuid, fs_name) + if mdc: + self.mdclist.append(mdc) + else: + panic('mdc not found:', mds_uuid) + + def prepare(self): + if is_prepared(self.name): + return + + self.info(); + for mdc in self.mdclist: + try: + # Only ignore connect failures with --force, which + # isn't implemented here yet. + mdc.prepare(ignore_connect_failure=0) + except CommandError, e: + print "Error preparing LMV %s\n" % mdc.uuid + raise e + + lctl.lmv_setup(self.name, self.uuid, self.desc_uuid, + string.join(self.devlist)) + + def cleanup(self): + for mdc in self.mdclist: + mdc.cleanup() + if is_prepared(self.name): + Module.cleanup(self) + + def add_module(self, manager): + for mdc in self.mdclist: + mdc.add_module(manager) break + manager.add_lustre_module('lmv', 'lmv') -class MDSDEV(Module): - def __init__(self,db): - Module.__init__(self, 'MDSDEV', db) + def correct_level(self, level, op=None): + return level + +class CONFDEV(Module): + def __init__(self, db, name, target_uuid, uuid): + Module.__init__(self, 'CONFDEV', db) self.devpath = self.db.get_val('devpath','') + self.backdevpath = self.db.get_val('devpath','') self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) self.fstype = self.db.get_val('fstype', '') - self.nspath = self.db.get_val('nspath', '') + self.backfstype = self.db.get_val('backfstype', '') self.mkfsoptions = self.db.get_val('mkfsoptions', '') - # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid - target_uuid = self.db.get_first_ref('target') - mds = self.db.lookup(target_uuid) - self.name = mds.getName() - self.filesystem_uuids = mds.get_refs('filesystem') - # FIXME: if fstype not set, then determine based on kernel version - self.format = self.db.get_val('autoformat', "no") - if mds.get_val('failover', 0): - self.failover_mds = 'f' - else: - self.failover_mds = 'n' - active_uuid = get_active_target(mds) - if not active_uuid: - panic("No target device found:", target_uuid) - if active_uuid == self.uuid: - self.active = 1 + self.mountfsoptions = self.db.get_val('mountfsoptions', '') + self.target = self.db.lookup(target_uuid) + self.name = "conf_%s" % self.target.getName() + self.client_uuids = self.target.get_refs('client') + self.obdtype = self.db.get_val('obdtype', '') + + self.mds_sec = self.db.get_val('mds_sec', '') + self.oss_sec = self.db.get_val('oss_sec', '') + self.deny_sec = self.db.get_val('deny_sec', '') + + if config.mds_mds_sec: + self.mds_sec = config.mds_mds_sec + if config.mds_oss_sec: + self.oss_sec = config.mds_oss_sec + if config.mds_deny_sec: + if self.deny_sec: + self.deny_sec = "%s,%s" %(self.deny_sec, config.mds_deny_sec) + else: + self.deny_sec = config.mds_deny_sec + + if self.obdtype == None: + self.obdtype = 'dumb' + + self.conf_name = name + self.conf_uuid = uuid + self.realdev = self.devpath + + self.lmv = None + self.master = None + + lmv_uuid = self.db.get_first_ref('lmv') + if lmv_uuid != None: + self.lmv = self.db.lookup(lmv_uuid) + if self.lmv != None: + self.client_uuids = self.lmv.get_refs('client') + + if self.target.get_class() == 'mds': + if self.target.get_val('failover', 0): + self.failover_mds = 'f' + else: + self.failover_mds = 'n' + self.format = self.db.get_val('autoformat', "no") else: - self.active = 0 - if self.active and config.group and config.group != mds.get_val('group'): - self.active = 0 - - self.inode_size = self.db.get_val_int('inodesize', 0) - if self.inode_size == 0: + self.format = self.db.get_val('autoformat', "yes") + self.osdtype = self.db.get_val('osdtype') + ost = self.db.lookup(target_uuid) + if ost.get_val('failover', 0): + self.failover_ost = 'f' + else: + self.failover_ost = 'n' + + self.inode_size = self.get_inode_size() + + if self.lmv != None: + client_uuid = self.name + "_lmv_UUID" + self.master = LMV(self.lmv, client_uuid, + self.conf_name, self.conf_name) + + def get_inode_size(self): + inode_size = self.db.get_val_int('inodesize', 0) + if inode_size == 0 and self.target.get_class() == 'mds': + + # default inode size for case when neither LOV either + # LMV is accessible. + self.inode_size = 256 + # find the LOV for this MDS - lovconfig_uuid = mds.get_first_ref('lovconfig') - if not lovconfig_uuid: - panic("No LOV config found for MDS ", mds.name) - lovconfig = mds.lookup(lovconfig_uuid) - lov_uuid = lovconfig.get_first_ref('lov') - if not lov_uuid: - panic("No LOV found for lovconfig ", lovconfig.name) - lov = LOV(self.db.lookup(lov_uuid), lov_uuid, 'FS_name', config_only = 1) - - # default stripe count controls default inode_size - stripe_count = lov.stripe_cnt - if stripe_count > 77: - self.inode_size = 4096 - elif stripe_count > 35: - self.inode_size = 2048 - elif stripe_count > 13: - self.inode_size = 1024 - elif stripe_count > 3: - self.inode_size = 512 + lovconfig_uuid = self.target.get_first_ref('lovconfig') + if lovconfig_uuid or self.lmv != None: + if self.lmv != None: + lovconfig_uuid = self.lmv.get_first_ref('lovconfig') + lovconfig = self.lmv.lookup(lovconfig_uuid) + lov_uuid = lovconfig.get_first_ref('lov') + if lov_uuid == None: + panic(self.target.getName() + ": No LOV found for lovconfig ", + lovconfig.name) + else: + lovconfig = self.target.lookup(lovconfig_uuid) + lov_uuid = lovconfig.get_first_ref('lov') + if lov_uuid == None: + panic(self.target.getName() + ": No LOV found for lovconfig ", + lovconfig.name) + if self.lmv != None: + lovconfig_uuid = self.lmv.get_first_ref('lovconfig') + lovconfig = self.lmv.lookup(lovconfig_uuid) + lov_uuid = lovconfig.get_first_ref('lov') + + lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name, + config_only = 1) + + # default stripe count controls default inode_size + if lov.stripe_cnt > 0: + stripe_count = lov.stripe_cnt + else: + stripe_count = len(lov.devlist) + if stripe_count > 77: + inode_size = 4096 + elif stripe_count > 35: + inode_size = 2048 + elif stripe_count > 13: + inode_size = 1024 + elif stripe_count > 3: + inode_size = 512 + else: + inode_size = 256 + + return inode_size + + def get_mount_options(self, blkdev): + options = def_mount_options(self.fstype, + self.target.get_class()) + + if config.mountfsoptions: + if options: + options = "%s,%s" %(options, config.mountfsoptions) else: - self.inode_size = 256 - - self.target_dev_uuid = self.uuid - self.uuid = target_uuid - # modules - self.add_lustre_module('mdc', 'mdc') - self.add_lustre_module('osc', 'osc') - self.add_lustre_module('lov', 'lov') - self.add_lustre_module('mds', 'mds') - if self.fstype == 'ldiskfs': - self.add_lustre_module('ldiskfs', 'ldiskfs') - if self.fstype: - self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype)) - - def load_module(self): - if self.active: - Module.load_module(self) + options = config.mountfsoptions + if self.mountfsoptions: + options = "%s,%s" %(options, self.mountfsoptions) + else: + if self.mountfsoptions: + if options: + options = "%s,%s" %(options, self.mountfsoptions) + else: + options = self.mountfsoptions + if self.fstype == 'smfs': + if options: + options = "%s,type=%s,dev=%s" %(options, self.backfstype, + blkdev) + else: + options = "type=%s,dev=%s" %(self.backfstype, + blkdev) + + if self.target.get_class() == 'mds': + if options: + options = "%s,acl,user_xattr,iopen_nopriv" %(options) + else: + options = "iopen_nopriv" + + return options + def prepare(self): if is_prepared(self.name): return - if not self.active: - debug(self.uuid, "not active") - return - if config.reformat: - # run write_conf automatically, if --reformat used - self.write_conf() - self.info(self.devpath, self.fstype, self.size, self.format) - run_acceptors() - # never reformat here - blkdev = block_dev(self.devpath, self.size, self.fstype, 0, - self.format, self.journal_size, self.inode_size, - self.mkfsoptions) - if not is_prepared('MDT'): - lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") - try: - lctl.newdev("mds", self.name, self.uuid, - setup ="%s %s %s" %(blkdev, self.fstype, self.name)) - except CommandError, e: - if e.rc == 2: - panic("MDS is missing the config log. Need to run " + - "lconf --write_conf.") - else: - raise e + + blkdev = block_dev(self.devpath, self.size, self.fstype, + config.reformat, self.format, self.journal_size, + self.inode_size, self.mkfsoptions, self.backfstype, + self.backdevpath) + + if self.fstype == 'smfs': + realdev = blkdev + else: + realdev = blkdev + + mountfsoptions = self.get_mount_options(blkdev) + + self.info(self.target.get_class(), realdev, mountfsoptions, + self.fstype, self.size, self.format) + + lctl.newdev("confobd", self.name, self.uuid, + setup ="%s %s %s" %(realdev, self.fstype, + mountfsoptions)) + + self.mountfsoptions = mountfsoptions + self.realdev = realdev + + def add_module(self, manager): + manager.add_lustre_module('obdclass', 'confobd') def write_conf(self): - if is_prepared(self.name): + if self.target.get_class() == 'ost': + config.record = 1 + lctl.clear_log(self.name, self.target.getName() + '-conf') + lctl.record(self.name, self.target.getName() + '-conf') + lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid, + setup ="%s %s %s %s" %(self.realdev, self.fstype, + self.failover_ost, + self.mountfsoptions)) + lctl.end_record() + lctl.clear_log(self.name, 'OSS-conf') + lctl.record(self.name, 'OSS-conf') + lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="") + lctl.end_record() + config.record = 0 return - self.info(self.devpath, self.fstype, self.format) - blkdev = block_dev(self.devpath, self.size, self.fstype, - config.reformat, self.format, self.journal_size, - self.inode_size, self.mkfsoptions) - lctl.newdev("mds", self.name, self.uuid, - setup ="%s %s" %(blkdev, self.fstype)) - - # record logs for the MDS lov - for uuid in self.filesystem_uuids: - log("recording clients for filesystem:", uuid) - fs = self.db.lookup(uuid) - obd_uuid = fs.get_first_ref('obd') + + if self.target.get_class() == 'mds': + if self.master != None: + master_name = self.master.name + else: + master_name = 'dumb' + + config.record = 1 + lctl.clear_log(self.name, self.target.getName() + '-conf') + lctl.record(self.name, self.target.getName() + '-conf') + lctl.attach("mds", self.conf_name, self.conf_uuid) + if self.mds_sec: + lctl.set_security(self.conf_name, "mds_sec", self.mds_sec) + if self.oss_sec: + lctl.set_security(self.conf_name, "oss_sec", self.oss_sec) + if self.deny_sec: + for flavor in string.split(self.deny_sec, ','): + lctl.set_security(self.conf_name, "deny_sec", flavor) + lctl.newdev("mds", self.conf_name, self.conf_uuid, + setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype, + self.conf_name, self.mountfsoptions, + master_name, self.obdtype)) + lctl.end_record() + config.record = 0 + + if not self.client_uuids: + return 0 + + for uuid in self.client_uuids: + log("recording client:", uuid) client_uuid = generate_client_uuid(self.name) - client = VOSC(self.db.lookup(obd_uuid), client_uuid, self.name, - self.name) + client = VOSC(self.db.lookup(uuid), client_uuid, + self.target.getName(), self.name) config.record = 1 - lctl.clear_log(self.name, self.name) - lctl.record(self.name, self.name) + lctl.clear_log(self.name, self.target.getName()) + lctl.record(self.name, self.target.getName()) client.prepare() - lctl.mount_option(self.name, client.get_name(), "") + lctl.mount_option(self.target.getName(), client.get_name(), "") lctl.end_record() config.cleanup = 1 - lctl.clear_log(self.name, self.name + '-clean') - lctl.record(self.name, self.name + '-clean') + lctl.clear_log(self.name, self.target.getName() + '-clean') + lctl.record(self.name, self.target.getName() + '-clean') client.cleanup() - lctl.del_mount_option(self.name) + lctl.del_mount_option(self.target.getName()) lctl.end_record() config.cleanup = 0 config.record = 0 + if config.record: + return + # record logs for each client if config.ldapurl: config_options = "--ldapurl " + config.ldapurl + " --config " + config.config @@ -1515,15 +2012,166 @@ class MDSDEV(Module): if config.verbose: for s in out: log("record> ", string.strip(s)) config.noexec = old_noexec - try: - lctl.cleanup(self.name, self.uuid, 0, 0) - except CommandError, e: - log(self.module_name, "cleanup failed: ", self.name) - e.dump() - cleanup_error(e.rc) - Module.cleanup(self) - clean_loop(self.devpath) + + def start(self): + try: + lctl.start(self.name, self.conf_name) + except CommandError, e: + raise e + if self.target.get_class() == 'ost': + if not is_prepared('OSS'): + try: + lctl.start(self.name, 'OSS') + except CommandError, e: + raise e + + def cleanup(self): + if is_prepared(self.name): + try: + lctl.cleanup(self.name, self.uuid, 0, 0) + clean_dev(self.devpath, self.fstype, + self.backfstype, self.backdevpath) + except CommandError, e: + log(self.module_name, "cleanup failed: ", self.name) + e.dump() + cleanup_error(e.rc) + Module.cleanup(self) + +class MDSDEV(Module): + def __init__(self,db): + Module.__init__(self, 'MDSDEV', db) + self.devpath = self.db.get_val('devpath','') + self.backdevpath = self.db.get_val('devpath','') + self.size = self.db.get_val_int('devsize', 0) + self.journal_size = self.db.get_val_int('journalsize', 0) + self.fstype = self.db.get_val('fstype', '') + self.backfstype = self.db.get_val('backfstype', '') + self.nspath = self.db.get_val('nspath', '') + self.mkfsoptions = self.db.get_val('mkfsoptions', '') + self.mountfsoptions = self.db.get_val('mountfsoptions', '') + self.obdtype = self.db.get_val('obdtype', '') + self.root_squash = self.db.get_val('root_squash', '') + self.no_root_squash = self.db.get_val('no_root_squash', '') + + target_uuid = self.db.get_first_ref('target') + self.target = self.db.lookup(target_uuid) + self.name = self.target.getName() + self.master = None + self.lmv = None + + lmv_uuid = self.db.get_first_ref('lmv') + if lmv_uuid != None: + self.lmv = self.db.lookup(lmv_uuid) + + active_uuid = get_active_target(self.target) + if not active_uuid: + panic("No target device found:", target_uuid) + if active_uuid == self.uuid: + self.active = 1 + group = self.target.get_val('group') + if config.group and config.group != group: + self.active = 0 + else: + self.active = 0 + + self.uuid = target_uuid + + # setup LMV + if self.lmv != None: + client_uuid = self.name + "_lmv_UUID" + self.master = LMV(self.lmv, client_uuid, + self.name, self.name) + + self.confobd = CONFDEV(self.db, self.name, + target_uuid, self.uuid) + + def add_module(self, manager): + if self.active: + manager.add_lustre_module('mdc', 'mdc') + manager.add_lustre_module('osc', 'osc') + manager.add_lustre_module('ost', 'ost') + manager.add_lustre_module('lov', 'lov') + manager.add_lustre_module('mds', 'mds') + + if self.fstype == 'smfs' or self.fstype == 'ldiskfs': + manager.add_lustre_module(self.fstype, self.fstype) + + if self.fstype: + manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype)) + + # if fstype is smfs, then we should also take care about backing + # store fs. + if self.fstype == 'smfs': + manager.add_lustre_module(self.backfstype, self.backfstype) + manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype)) + + for option in string.split(self.mountfsoptions, ','): + if option == 'snap': + if not self.fstype == 'smfs': + panic("mountoptions has 'snap', but fstype is not smfs.") + manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype)) + manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype)) + + # add LMV modules + if self.master != None: + self.master.add_module(manager) + # add CONFOBD modules + if self.confobd != None: + self.confobd.add_module(manager) + + def write_conf(self): + if is_prepared(self.name): + return + if not self.active: + debug(self.uuid, "not active") + return + run_acceptors() + self.confobd.prepare() + self.confobd.write_conf() + self.confobd.cleanup() + + def prepare(self): + if is_prepared(self.name): + return + if not self.active: + debug(self.uuid, "not active") + return + run_acceptors() + + self.confobd.prepare() + if config.reformat: + self.confobd.write_conf() + + # prepare LMV + if self.master != None: + self.master.prepare() + + if not config.record: + self.confobd.start() + + if not is_prepared('MDT'): + lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") + + if development_mode(): + procentry = "/proc/fs/lustre/mds/lsd_upcall" + upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall") + if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)): + print "MDS Warning: failed to set lsd cache upcall" + else: + run("echo ", upcall, " > ", procentry) + + if config.root_squash == None: + config.root_squash = self.root_squash + if config.no_root_squash == None: + config.no_root_squash = self.no_root_squash + if config.root_squash: + if config.no_root_squash: + nsnid = config.no_root_squash + else: + nsnid = "0" + lctl.root_squash(self.name, config.root_squash, nsnid) + def msd_remaining(self): out = lctl.device_list() for s in out: @@ -1535,7 +2183,7 @@ class MDSDEV(Module): def safe_to_clean_modules(self): return not self.msd_remaining() - + def cleanup(self): if not self.active: debug(self.uuid, "not active") @@ -1550,6 +2198,9 @@ class MDSDEV(Module): e.dump() cleanup_error(e.rc) Module.cleanup(self) + # cleanup LMV + if self.master != None: + self.master.cleanup() if not self.msd_remaining() and is_prepared('MDT'): try: lctl.cleanup("MDT", "MDT_UUID", config.force, @@ -1558,18 +2209,28 @@ class MDSDEV(Module): print "cleanup failed: ", self.name e.dump() cleanup_error(e.rc) - clean_loop(self.devpath) - + + if self.confobd: + self.confobd.cleanup() + + def correct_level(self, level, op=None): + #if self.master != None: + # level = level + 2 + return level + class OSD(Module): def __init__(self, db): Module.__init__(self, 'OSD', db) self.osdtype = self.db.get_val('osdtype') self.devpath = self.db.get_val('devpath', '') + self.backdevpath = self.db.get_val('devpath', '') self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) self.inode_size = self.db.get_val_int('inodesize', 0) self.mkfsoptions = self.db.get_val('mkfsoptions', '') + self.mountfsoptions = self.db.get_val('mountfsoptions', '') self.fstype = self.db.get_val('fstype', '') + self.backfstype = self.db.get_val('backfstype', '') self.nspath = self.db.get_val('nspath', '') target_uuid = self.db.get_first_ref('target') ost = self.db.lookup(target_uuid) @@ -1580,54 +2241,96 @@ class OSD(Module): else: self.failover_ost = 'n' + self.deny_sec = self.db.get_val('deny_sec', '') + + if config.ost_deny_sec: + if self.deny_sec: + self.deny_sec = "%s,%s" %(self.deny_sec, config.ost_deny_sec) + else: + self.deny_sec = config.ost_deny_sec + active_uuid = get_active_target(ost) if not active_uuid: panic("No target device found:", target_uuid) if active_uuid == self.uuid: self.active = 1 + group = ost.get_val('group') + if config.group and config.group != group: + self.active = 0 else: self.active = 0 - if self.active and config.group and config.group != ost.get_val('group'): - self.active = 0 - - self.target_dev_uuid = self.uuid + self.uuid = target_uuid - # modules - self.add_lustre_module('ost', 'ost') - # FIXME: should we default to ext3 here? - if self.fstype == 'ldiskfs': - self.add_lustre_module('ldiskfs', 'ldiskfs') + self.confobd = CONFDEV(self.db, self.name, + target_uuid, self.uuid) + + def add_module(self, manager): + if not self.active: + return + manager.add_lustre_module('ost', 'ost') + + if self.fstype == 'smfs' or self.fstype == 'ldiskfs': + manager.add_lustre_module(self.fstype, self.fstype) + if self.fstype: - self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype)) - self.add_lustre_module(self.osdtype, self.osdtype) + manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype)) - def load_module(self): - if self.active: - Module.load_module(self) + if self.fstype == 'smfs': + manager.add_lustre_module(self.backfstype, self.backfstype) + manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype)) + + for option in self.mountfsoptions: + if option == 'snap': + if not self.fstype == 'smfs': + panic("mountoptions with snap, but fstype is not smfs\n") + manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype)) + manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype)) + + manager.add_lustre_module(self.osdtype, self.osdtype) + + # add CONFOBD modules + if self.confobd != None: + self.confobd.add_module(manager) - # need to check /proc/mounts and /etc/mtab before - # formatting anything. - # FIXME: check if device is already formatted. def prepare(self): if is_prepared(self.name): return if not self.active: debug(self.uuid, "not active") return - self.info(self.osdtype, self.devpath, self.size, self.fstype, - self.format, self.journal_size, self.inode_size) + run_acceptors() + if self.osdtype == 'obdecho': - blkdev = '' - else: - blkdev = block_dev(self.devpath, self.size, self.fstype, - config.reformat, self.format, self.journal_size, - self.inode_size, self.mkfsoptions) - lctl.newdev(self.osdtype, self.name, self.uuid, - setup ="%s %s %s" %(blkdev, self.fstype, - self.failover_ost)) - if not is_prepared('OSS'): - lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="") + self.info(self.osdtype) + lctl.newdev("obdecho", self.name, self.uuid) + if not is_prepared('OSS'): + lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="") + else: + self.confobd.prepare() + if config.reformat: + self.confobd.write_conf() + if not config.record: + self.confobd.start() + + if self.deny_sec: + for flavor in string.split(self.deny_sec, ','): + lctl.set_security(self.name, "deny_sec", flavor) + + def write_conf(self): + if is_prepared(self.name): + return + if not self.active: + debug(self.uuid, "not active") + return + + run_acceptors() + if self.osdtype != 'obdecho': + self.confobd.prepare() + self.confobd.write_conf() + if not config.write_conf: + self.confobd.start() + self.confobd.cleanup() def osd_remaining(self): out = lctl.device_list() @@ -1645,6 +2348,7 @@ class OSD(Module): if not self.active: debug(self.uuid, "not active") return + if is_prepared(self.name): self.info() try: @@ -1662,32 +2366,29 @@ class OSD(Module): print "cleanup failed: ", self.name e.dump() cleanup_error(e.rc) - if not self.osdtype == 'obdecho': - clean_loop(self.devpath) -def mgmt_uuid_for_fs(mtpt_name): - if not mtpt_name: - return '' - mtpt_db = toplevel.lookup_name(mtpt_name) - fs_uuid = mtpt_db.get_first_ref('filesystem') - fs = toplevel.lookup(fs_uuid) - if not fs: - return '' - return fs.get_first_ref('mgmt') + if self.osdtype != 'obdecho': + if self.confobd: + self.confobd.cleanup() + + def correct_level(self, level, op=None): + return level # Generic client module, used by OSC and MDC class Client(Module): - def __init__(self, tgtdb, uuid, module, fs_name, self_name=None, - module_dir=None): + def __init__(self, tgtdb, uuid, module, fs_name, + self_name=None, module_dir=None): self.target_name = tgtdb.getName() self.target_uuid = tgtdb.getUUID() + self.module_dir = module_dir + self.backup_targets = [] + self.module = module self.db = tgtdb self.tgt_dev_uuid = get_active_target(tgtdb) if not self.tgt_dev_uuid: - panic("No target device found for target:", self.target_name) - - self.kmod = kmod(config.lustre, config.portals) + panic("No target device found for target(1):", self.target_name) + self._server = None self._connected = 0 @@ -1700,28 +2401,43 @@ class Client(Module): self.name = self_name self.uuid = uuid self.lookup_server(self.tgt_dev_uuid) - mgmt_uuid = mgmt_uuid_for_fs(fs_name) - if mgmt_uuid: - self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid) - else: - self.mgmt_name = '' + self.lookup_backup_targets() self.fs_name = fs_name - if not module_dir: - module_dir = module - self.add_lustre_module(module_dir, module) + if not self.module_dir: + self.module_dir = module + + def add_module(self, manager): + manager.add_lustre_module(self.module_dir, self.module) def lookup_server(self, srv_uuid): """ Lookup a server's network information """ self._server_nets = get_ost_net(self.db, srv_uuid) if len(self._server_nets) == 0: panic ("Unable to find a server for:", srv_uuid) + + def get_name(self): + return self.name def get_servers(self): return self._server_nets + def lookup_backup_targets(self): + """ Lookup alternative network information """ + prof_list = toplustreDB.get_refs('profile') + for prof_uuid in prof_list: + prof_db = toplustreDB.lookup(prof_uuid) + if not prof_db: + panic("profile:", prof_uuid, "not found.") + for ref_class, ref_uuid in prof_db.get_all_refs(): + if ref_class in ('osd', 'mdsdev'): + devdb = toplustreDB.lookup(ref_uuid) + uuid = devdb.get_first_ref('target') + if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid: + self.backup_targets.append(ref_uuid) + def prepare(self, ignore_connect_failure = 0): self.info(self.target_uuid) - if is_prepared(self.name): + if not config.record and is_prepared(self.name): self.cleanup() try: srv = choose_local_server(self.get_servers()) @@ -1736,6 +2452,7 @@ class Client(Module): except CommandError, e: if not ignore_connect_failure: raise e + if srv: if self.target_uuid in config.inactive and self.permits_inactive(): debug("%s inactive" % self.target_uuid) @@ -1744,8 +2461,23 @@ class Client(Module): debug("%s active" % self.target_uuid) inactive_p = "" lctl.newdev(self.module, self.name, self.uuid, - setup ="%s %s %s %s" % (self.target_uuid, srv.nid_uuid, - inactive_p, self.mgmt_name)) + setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid, + inactive_p)) + for tgt_dev_uuid in self.backup_targets: + this_nets = get_ost_net(toplustreDB, tgt_dev_uuid) + if len(this_nets) == 0: + panic ("Unable to find a server for:", tgt_dev_uuid) + srv = choose_local_server(this_nets) + if srv: + lctl.connect(srv) + else: + routes = find_route(this_nets); + if len(routes) == 0: + panic("no route to", tgt_dev_uuid) + for (srv, r) in routes: + lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3]) + if srv: + lctl.add_conn(self.name, srv.nid_uuid); def cleanup(self): if is_prepared(self.name): @@ -1762,6 +2494,25 @@ class Client(Module): e.dump() cleanup_error(e.rc) + for tgt_dev_uuid in self.backup_targets: + this_net = get_ost_net(toplustreDB, tgt_dev_uuid) + srv = choose_local_server(this_net) + if srv: + lctl.disconnect(srv) + else: + for (srv, r) in find_route(this_net): + lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3]) + + def correct_level(self, level, op=None): + return level + + def deactivate(self): + try: + lctl.deactivate(self.name) + except CommandError, e: + log(self.module_name, "deactivate failed: ", self.name) + e.dump() + cleanup_error(e.rc) class MDC(Client): def __init__(self, db, uuid, fs_name): @@ -1777,66 +2528,234 @@ class OSC(Client): def permits_inactive(self): return 1 -def mgmtcli_name_for_uuid(uuid): - return 'MGMTCLI_%s' % uuid - -class ManagementClient(Client): - def __init__(self, db, uuid): - Client.__init__(self, db, uuid, 'mgmt_cli', '', - self_name = mgmtcli_name_for_uuid(db.getUUID()), - module_dir = 'mgmt') +class CMOBD(Module): + def __init__(self, db): + Module.__init__(self, 'CMOBD', db) + self.name = self.db.getName(); + self.uuid = generate_client_uuid(self.name) + self.master_uuid = self.db.get_first_ref('masterobd') + self.cache_uuid = self.db.get_first_ref('cacheobd') + + master_obd = self.db.lookup(self.master_uuid) + if not master_obd: + panic('master obd not found:', self.master_uuid) + + cache_obd = self.db.lookup(self.cache_uuid) + if not cache_obd: + panic('cache obd not found:', self.cache_uuid) + + self.master = None + self.cache = None + master_class = master_obd.get_class() + cache_class = cache_obd.get_class() + + if master_class == 'ost' or master_class == 'lov': + client_uuid = "%s_lov_master_UUID" % (self.name) + self.master = LOV(master_obd, client_uuid, self.name); + elif master_class == 'mds': + self.master = get_mdc(db, self.name, self.master_uuid) + elif master_class == 'lmv': + #tmp fix: cobd and cmobd will use same uuid, so use const name here + client_uuid = "%s_lmv_master_UUID" % "master" + self.master = LMV(master_obd, client_uuid, self.name); + else: + panic("unknown master obd class '%s'" %(master_class)) + + if cache_class == 'ost' or cache_class == 'lov': + client_uuid = "%s_lov_cache_UUID" % (self.name) + self.cache = LOV(cache_obd, client_uuid, self.name); + elif cache_class == 'mds': + self.cache = get_mdc(db, self.name, self.cache_uuid) + elif cache_class == 'lmv': + client_uuid = "%s_lmv_cache_UUID" % (self.name) + self.cache = LMV(cache_obd, client_uuid, self.name); + else: + panic("unknown cache obd class '%s'" %(cache_class)) + + def prepare(self): + self.master.prepare() + if not config.record and is_prepared(self.name): + return + self.info(self.master_uuid, self.cache_uuid) + lctl.newdev("cmobd", self.name, self.uuid, + setup ="%s %s" %(self.master.uuid, + self.cache.uuid)) + + def get_uuid(self): + return self.uuid + + def get_name(self): + return self.name + + def get_master_name(self): + return self.master.name + + def get_cache_name(self): + return self.cache.name + + def cleanup(self): + if is_prepared(self.name): + Module.cleanup(self) + if self.master: + self.master.cleanup() + + def add_module(self, manager): + manager.add_lustre_module('smfs', 'smfs') + manager.add_lustre_module('cmobd', 'cmobd') + self.master.add_module(manager) + + def correct_level(self, level, op=None): + return level + class COBD(Module): - def __init__(self, db): + def __init__(self, db, uuid, name): Module.__init__(self, 'COBD', db) - self.real_uuid = self.db.get_first_ref('realobd') + self.name = self.db.getName(); + self.uuid = generate_client_uuid(self.name) + self.master_uuid = self.db.get_first_ref('masterobd') self.cache_uuid = self.db.get_first_ref('cacheobd') - self.add_lustre_module('cobd' , 'cobd') - # need to check /proc/mounts and /etc/mtab before - # formatting anything. - # FIXME: check if device is already formatted. + master_obd = self.db.lookup(self.master_uuid) + if not master_obd: + panic('master obd not found:', self.master_uuid) + + cache_obd = self.db.lookup(self.cache_uuid) + if not cache_obd: + panic('cache obd not found:', self.cache_uuid) + + self.master = None + self.cache = None + + master_class = master_obd.get_class() + cache_class = cache_obd.get_class() + + if master_class == 'ost' or master_class == 'lov': + client_uuid = "%s_lov_master_UUID" % (self.name) + self.master = LOV(master_obd, client_uuid, name); + elif master_class == 'mds': + self.master = get_mdc(db, name, self.master_uuid) + elif master_class == 'lmv': + #tmp fix: cobd and cmobd will use same uuid, so use const name here + client_uuid = "%s_lmv_master_UUID" % "master" + self.master = LMV(master_obd, client_uuid, self.name); + else: + panic("unknown master obd class '%s'" %(master_class)) + + if cache_class == 'ost' or cache_class == 'lov': + client_uuid = "%s_lov_cache_UUID" % (self.name) + self.cache = LOV(cache_obd, client_uuid, name); + elif cache_class == 'mds': + self.cache = get_mdc(db, name, self.cache_uuid) + elif cache_class == 'lmv': + client_uuid = "%s_lmv_cache_UUID" % "cache" + self.cache = LMV(cache_obd, client_uuid, self.name); + else: + panic("unknown cache obd class '%s'" %(cache_class)) + + def get_uuid(self): + return self.uuid + + def get_name(self): + return self.name + + def get_master_name(self): + return self.master.name + + def get_cache_name(self): + return self.cache.name + def prepare(self): - if is_prepared(self.name): + if not config.record and is_prepared(self.name): return - self.info(self.real_uuid, self.cache_uuid) + self.master.prepare() + self.cache.prepare() + self.info(self.master_uuid, self.cache_uuid) lctl.newdev("cobd", self.name, self.uuid, - setup ="%s %s" %(self.real_uuid, self.cache_uuid)) + setup ="%s %s" %(self.master.name, + self.cache.name)) + def cleanup(self): + if is_prepared(self.name): + Module.cleanup(self) + self.master.cleanup() + self.cache.cleanup() + + def add_module(self, manager): + manager.add_lustre_module('cobd', 'cobd') + self.master.add_module(manager) # virtual interface for OSC and LOV class VOSC(Module): - def __init__(self, db, uuid, fs_name, name_override = None): + def __init__(self, db, client_uuid, name, name_override = None): Module.__init__(self, 'VOSC', db) if db.get_class() == 'lov': - self.osc = LOV(db, uuid, fs_name, name_override) + self.osc = LOV(db, client_uuid, name, name_override) + self.type = 'lov' + elif db.get_class() == 'cobd': + self.osc = COBD(db, client_uuid, name) + self.type = 'cobd' else: - self.osc = get_osc(db, uuid, fs_name) + self.osc = OSC(db, client_uuid, name) + self.type = 'osc' + def get_uuid(self): - return self.osc.uuid + return self.osc.get_uuid() + def get_name(self): - return self.osc.name + return self.osc.get_name() + def prepare(self): self.osc.prepare() + def cleanup(self): self.osc.cleanup() - def load_module(self): - self.osc.load_module() - def cleanup_module(self): - self.osc.cleanup_module() + + def add_module(self, manager): + self.osc.add_module(manager) + + def correct_level(self, level, op=None): + return self.osc.correct_level(level, op) + +# virtual interface for MDC and LMV +class VMDC(Module): + def __init__(self, db, client_uuid, name, name_override = None): + Module.__init__(self, 'VMDC', db) + if db.get_class() == 'lmv': + self.mdc = LMV(db, client_uuid, name, name_override) + elif db.get_class() == 'cobd': + self.mdc = COBD(db, client_uuid, name) + else: + self.mdc = MDC(db, client_uuid, name) + + def get_uuid(self): + return self.mdc.uuid + def get_name(self): + return self.mdc.name + + def prepare(self): + self.mdc.prepare() + + def cleanup(self): + self.mdc.cleanup() + + def add_module(self, manager): + self.mdc.add_module(manager) + + def correct_level(self, level, op=None): + return self.mdc.correct_level(level, op) class ECHO_CLIENT(Module): def __init__(self,db): Module.__init__(self, 'ECHO_CLIENT', db) - self.add_lustre_module('obdecho', 'obdecho') self.obd_uuid = self.db.get_first_ref('obd') obd = self.db.lookup(self.obd_uuid) self.uuid = generate_client_uuid(self.name) self.osc = VOSC(obd, self.uuid, self.name) def prepare(self): - if is_prepared(self.name): + if not config.record and is_prepared(self.name): return run_acceptors() self.osc.prepare() # XXX This is so cheating. -p @@ -1850,14 +2769,12 @@ class ECHO_CLIENT(Module): Module.cleanup(self) self.osc.cleanup() - def load_module(self): - self.osc.load_module() - Module.load_module(self) - - def cleanup_module(self): - Module.cleanup_module(self) - self.osc.cleanup_module() + def add_module(self, manager): + self.osc.add_module(manager) + manager.add_lustre_module('obdecho', 'obdecho') + def correct_level(self, level, op=None): + return level def generate_client_uuid(name): client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576), @@ -1866,50 +2783,70 @@ def generate_client_uuid(name): int(random.random() * 1048576)) return client_uuid[:36] - class Mountpoint(Module): def __init__(self,db): Module.__init__(self, 'MTPT', db) self.path = self.db.get_val('path') + self.clientoptions = self.db.get_val('clientoptions', '') self.fs_uuid = self.db.get_first_ref('filesystem') fs = self.db.lookup(self.fs_uuid) - self.mds_uuid = fs.get_first_ref('mds') + self.mds_uuid = fs.get_first_ref('lmv') + if not self.mds_uuid: + self.mds_uuid = fs.get_first_ref('mds') self.obd_uuid = fs.get_first_ref('obd') - self.mgmt_uuid = fs.get_first_ref('mgmt') - obd = self.db.lookup(self.obd_uuid) client_uuid = generate_client_uuid(self.name) - self.vosc = VOSC(obd, client_uuid, self.name) - self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid) - - self.add_lustre_module('mdc', 'mdc') - self.add_lustre_module('llite', 'llite') - if self.mgmt_uuid: - self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid), - client_uuid) - else: - self.mgmtcli = None + self.oss_sec = self.db.get_val('oss_sec','null') + self.mds_sec = self.db.get_val('mds_sec','null') + if config.mds_sec: + self.mds_sec = config.mds_sec + if config.oss_sec: + self.oss_sec = config.oss_sec + + ost = self.db.lookup(self.obd_uuid) + if not ost: + panic("no ost: ", self.obd_uuid) + + mds = self.db.lookup(self.mds_uuid) + if not mds: + panic("no mds: ", self.mds_uuid) + + self.vosc = VOSC(ost, client_uuid, self.name, self.name) + self.vmdc = VMDC(mds, client_uuid, self.name, self.name) + def prepare(self): - if fs_is_mounted(self.path): + if not config.record and fs_is_mounted(self.path): log(self.path, "already mounted.") return run_acceptors() - if self.mgmtcli: - self.mgmtcli.prepare() - self.vosc.prepare() - self.mdc.prepare() - mdc_name = self.mdc.name + + self.vosc.prepare() + self.vmdc.prepare() self.info(self.path, self.mds_uuid, self.obd_uuid) if config.record or config.lctl_dump: - lctl.mount_option(local_node_name, self.vosc.get_name(), mdc_name) + lctl.mount_option(local_node_name, self.vosc.get_name(), + self.vmdc.get_name()) return - cmd = "mount -t lustre_lite -o osc=%s,mdc=%s %s %s" % \ - (self.vosc.get_name(), mdc_name, config.config, self.path) + + if config.clientoptions: + if self.clientoptions: + self.clientoptions = self.clientoptions + ',' + config.clientoptions + else: + self.clientoptions = config.clientoptions + if self.clientoptions: + self.clientoptions = ',' + self.clientoptions + # Linux kernel will deal with async and not pass it to ll_fill_super, + # so replace it with Lustre async + self.clientoptions = string.replace(self.clientoptions, "async", "lasync") + + cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,mds_sec=%s,oss_sec=%s%s %s %s" % \ + (self.vosc.get_name(), self.vmdc.get_name(), self.mds_sec, + self.oss_sec, self.clientoptions, config.config, self.path) run("mkdir", self.path) ret, val = run(cmd) if ret: - self.mdc.cleanup() + self.vmdc.cleanup() self.vosc.cleanup() panic("mount failed:", self.path, ":", string.join(val)) @@ -1930,23 +2867,16 @@ class Mountpoint(Module): if fs_is_mounted(self.path): panic("fs is still mounted:", self.path) - self.mdc.cleanup() + self.vmdc.cleanup() self.vosc.cleanup() - if self.mgmtcli: - self.mgmtcli.cleanup() - - def load_module(self): - if self.mgmtcli: - self.mgmtcli.load_module() - self.vosc.load_module() - Module.load_module(self) - def cleanup_module(self): - Module.cleanup_module(self) - self.vosc.cleanup_module() - if self.mgmtcli: - self.mgmtcli.cleanup_module() + def add_module(self, manager): + self.vosc.add_module(manager) + self.vmdc.add_module(manager) + manager.add_lustre_module('llite', 'llite') + def correct_level(self, level, op=None): + return level # ============================================================ # misc query functions @@ -1960,13 +2890,12 @@ def get_ost_net(self, osd_uuid): node = self.lookup(node_uuid) if not node: panic("unable to find node for osd_uuid:", osd_uuid, - " node_ref:", node_uuid) + " node_ref:", node_uuid_) for net_uuid in node.get_networks(): db = node.lookup(net_uuid) srv_list.append(Network(db)) return srv_list - # the order of iniitailization is based on level. def getServiceLevel(self): type = self.get_class() @@ -1977,19 +2906,21 @@ def getServiceLevel(self): ret = 6 elif type in ('ldlm',): ret = 20 - elif type in ('mgmt',): - ret = 25 elif type in ('osd', 'cobd'): ret = 30 elif type in ('mdsdev',): ret = 40 + elif type in ('lmv',): + ret = 45 elif type in ('mountpoint', 'echoclient'): - ret = 70 + ret = 60 + elif type in ('cmobd',): + ret = 70 else: panic("Unknown type: ", type) if ret < config.minlevel or ret > config.maxlevel: - ret = 0 + ret = 0 return ret # @@ -1997,7 +2928,7 @@ def getServiceLevel(self): # [(level, db_object),] def getServices(self): list = [] - for ref_class, ref_uuid in self.get_all_refs(): + for ref_class, ref_uuid in self.get_all_refs(): servdb = self.lookup(ref_uuid) if servdb: level = getServiceLevel(servdb) @@ -2011,7 +2942,7 @@ def getServices(self): ############################################################ -# MDC UUID hack - +# MDC UUID hack - # FIXME: clean this mess up! # # OSC is no longer in the xml, so we have to fake it. @@ -2020,11 +2951,11 @@ def get_osc(ost_db, uuid, fs_name): osc = OSC(ost_db, uuid, fs_name) return osc -def get_mdc(db, uuid, fs_name, mds_uuid): +def get_mdc(db, fs_name, mds_uuid): mds_db = db.lookup(mds_uuid); if not mds_db: - panic("no mds:", mds_uuid) - mdc = MDC(mds_db, uuid, fs_name) + error("no mds:", mds_uuid) + mdc = MDC(mds_db, mds_uuid, fs_name) return mdc ############################################################ @@ -2041,11 +2972,8 @@ def find_local_clusters(node_db): debug("add_local", netuuid) local_clusters.append((srv.net_type, srv.cluster_id, srv.nid)) if srv.port > 0: - if acceptors.has_key(srv.port): - panic("duplicate port:", srv.port) - acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type, - srv.send_mem, srv.recv_mem, - srv.irq_affinity) + if not acceptors.has_key(srv.port): + acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type) # This node is a gateway. is_router = 0 @@ -2157,15 +3085,17 @@ def newService(db): elif type == 'osd': n = OSD(db) elif type == 'cobd': - n = COBD(db) + n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID") + elif type == 'cmobd': + n = CMOBD(db) elif type == 'mdsdev': n = MDSDEV(db) elif type == 'mountpoint': n = Mountpoint(db) elif type == 'echoclient': n = ECHO_CLIENT(db) - elif type == 'mgmt': - n = Management(db) + elif type == 'lmv': + n = LMV(db) else: panic ("unknown service type:", type) return n @@ -2182,49 +3112,213 @@ def for_each_profile(db, prof_list, operation): for prof_uuid in prof_list: prof_db = db.lookup(prof_uuid) if not prof_db: - panic("profile:", profile, "not found.") + panic("profile:", prof_uuid, "not found.") services = getServices(prof_db) operation(services) - + +def magic_get_osc(db, rec, lov): + if lov: + lov_uuid = lov.get_uuid() + lov_name = lov.osc.fs_name + else: + lov_uuid = rec.getAttribute('lov_uuidref') + # FIXME: better way to find the mountpoint? + filesystems = db.root_node.getElementsByTagName('filesystem') + fsuuid = None + for fs in filesystems: + ref = fs.getElementsByTagName('obd_ref') + if ref[0].getAttribute('uuidref') == lov_uuid: + fsuuid = fs.getAttribute('uuid') + break + + if not fsuuid: + panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.") + + mtpts = db.root_node.getElementsByTagName('mountpoint') + lov_name = None + for fs in mtpts: + ref = fs.getElementsByTagName('filesystem_ref') + if ref[0].getAttribute('uuidref') == fsuuid: + lov_name = fs.getAttribute('name') + break + + if not lov_name: + panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.") + + print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name + + ost_uuid = rec.getAttribute('ost_uuidref') + obd = db.lookup(ost_uuid) + + if not obd: + panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.") + + osc = get_osc(obd, lov_uuid, lov_name) + if not osc: + panic('osc not found:', obd_uuid) + return osc + +# write logs for update records. sadly, logs of all types -- and updates in +# particular -- are something of an afterthought. lconf needs rewritten with +# these as core concepts. so this is a pretty big hack. +def process_update_record(db, update, lov): + for rec in update.childNodes: + if rec.nodeType != rec.ELEMENT_NODE: + continue + + log("found "+rec.nodeName+" record in update version " + + str(update.getAttribute('version'))) + + lov_uuid = rec.getAttribute('lov_uuidref') + ost_uuid = rec.getAttribute('ost_uuidref') + index = rec.getAttribute('index') + gen = rec.getAttribute('generation') + + if not lov_uuid or not ost_uuid or not index or not gen: + panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.") + + if not lov: + tmplov = db.lookup(lov_uuid) + if not tmplov: + panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.") + lov_name = tmplov.getName() + else: + lov_name = lov.osc.name + + # ------------------------------------------------------------- add + if rec.nodeName == 'add': + if config.cleanup: + lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen) + continue + + osc = magic_get_osc(db, rec, lov) + + try: + # Only ignore connect failures with --force, which + # isn't implemented here yet. + osc.prepare(ignore_connect_failure=0) + except CommandError, e: + print "Error preparing OSC %s\n" % osc.uuid + raise e + + lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen) + + # ------------------------------------------------------ deactivate + elif rec.nodeName == 'deactivate': + if config.cleanup: + continue + + osc = magic_get_osc(db, rec, lov) + + try: + osc.deactivate() + except CommandError, e: + print "Error deactivating OSC %s\n" % osc.uuid + raise e + + # ---------------------------------------------------------- delete + elif rec.nodeName == 'delete': + if config.cleanup: + continue + + osc = magic_get_osc(db, rec, lov) + + try: + config.cleanup = 1 + osc.cleanup() + config.cleanup = 0 + except CommandError, e: + print "Error cleaning up OSC %s\n" % osc.uuid + raise e + + lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen) + +def process_updates(db, log_device, log_name, lov = None): + updates = db.root_node.getElementsByTagName('update') + for u in updates: + if not u.childNodes: + log("ignoring empty update record (version " + + str(u.getAttribute('version')) + ")") + continue + + version = u.getAttribute('version') + real_name = "%s-%s" % (log_name, version) + lctl.clear_log(log_device, real_name) + lctl.record(log_device, real_name) + + process_update_record(db, u, lov) + + lctl.end_record() + def doWriteconf(services): - if config.nosetup: - return + #if config.nosetup: + # return for s in services: - if s[1].get_class() == 'mdsdev': + if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd': n = newService(s[1]) n.write_conf() + n.cleanup() def doSetup(services): if config.nosetup: return + slist = [] for s in services: n = newService(s[1]) - n.prepare() - -def doModules(services): + n.level = s[0] + slist.append((n.level, n)) + nlist = [] + for n in slist: + nl = n[1].correct_level(n[0]) + nlist.append((nl, n[1])) + nlist.sort() + for n in nlist: + n[1].prepare() + +def doLoadModules(services): if config.nomod: return + + # adding all needed modules from all services for s in services: n = newService(s[1]) - n.load_module() + n.add_module(mod_manager) + + # loading all registered modules + mod_manager.load_modules() -def doCleanup(services): - if config.nosetup: +def doUnloadModules(services): + if config.nomod: return - services.reverse() + + # adding all needed modules from all services for s in services: n = newService(s[1]) - if n.safe_to_clean(): - n.cleanup() + if n.safe_to_clean_modules(): + n.add_module(mod_manager) + + # unloading all registered modules + mod_manager.cleanup_modules() -def doUnloadModules(services): - if config.nomod: +def doCleanup(services): + if config.nosetup: return - services.reverse() + slist = [] + for s in services: n = newService(s[1]) - if n.safe_to_clean_modules(): - n.cleanup_module() + n.level = s[0] + slist.append((n.level, n)) + nlist = [] + for n in slist: + nl = n[1].correct_level(n[0]) + nlist.append((nl, n[1])) + nlist.sort() + nlist.reverse() + + for n in nlist: + if n[1].safe_to_clean(): + n[1].cleanup() # # Load profile for @@ -2255,10 +3349,11 @@ def doHost(lustreDB, hosts): prof_list = node_db.get_refs('profile') if config.write_conf: - for_each_profile(node_db, prof_list, doModules) + for_each_profile(node_db, prof_list, doLoadModules) sys_make_devices() for_each_profile(node_db, prof_list, doWriteconf) for_each_profile(node_db, prof_list, doUnloadModules) + lustreDB.close() elif config.recover: if not (config.tgt_uuid and config.client_uuid and config.conn_uuid): @@ -2283,6 +3378,7 @@ def doHost(lustreDB, hosts): for_each_profile(node_db, prof_list, doCleanup) for_each_profile(node_db, prof_list, doUnloadModules) + lustreDB.close() else: # ugly hack, only need to run lctl commands for --dump @@ -2296,7 +3392,7 @@ def doHost(lustreDB, hosts): sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF) sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF) - for_each_profile(node_db, prof_list, doModules) + for_each_profile(node_db, prof_list, doLoadModules) sys_set_debug_path() sys_set_ptldebug(ptldebug) @@ -2313,22 +3409,24 @@ def doHost(lustreDB, hosts): sys_set_portals_upcall(portals_upcall) for_each_profile(node_db, prof_list, doSetup) + lustreDB.close() -def doRecovery(db, lctl, tgt_uuid, client_uuid, nid_uuid): - tgt = db.lookup(tgt_uuid) +def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid): + tgt = lustreDB.lookup(tgt_uuid) if not tgt: raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.") new_uuid = get_active_target(tgt) if not new_uuid: raise Lustre.LconfError("doRecovery: no active target found for: " + tgt_uuid) - net = choose_local_server(get_ost_net(db, new_uuid)) + net = choose_local_server(get_ost_net(lustreDB, new_uuid)) if not net: raise Lustre.LconfError("Unable to find a connection to:" + new_uuid) log("Reconnecting", tgt_uuid, " to ", net.nid_uuid); try: - oldnet = get_server_by_nid_uuid(db, nid_uuid) + oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid) + lustreDB.close() if oldnet: lctl.disconnect(oldnet) except CommandError, e: @@ -2404,6 +3502,23 @@ def sys_set_timeout(timeout): lctl.set_timeout(timeout) def sys_tweak_socknal (): + # reserve at least 8MB, or we run out of RAM in skb_alloc under read + if sys_get_branch() == '2.6': + fp = open('/proc/meminfo') + lines = fp.readlines() + fp.close() + memtotal = 131072 + for l in lines: + a = string.split(l) + if a[0] == 'MemTotal:': + memtotal = a[1] + debug("memtotal" + memtotal) + if int(memtotal) < 262144: + minfree = int(memtotal) / 16 + else: + minfree = 32768 + debug("+ minfree ", minfree) + sysctl("vm/min_free_kbytes", minfree) if config.single_socket: sysctl("socknal/typed", 0) @@ -2412,8 +3527,8 @@ def sys_optimize_elan (): "/proc/qsnet/elan3/config/eventint_punt_loops", "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"] for p in procfiles: - if os.access(p, os.R_OK): - run ("echo 0 > " + p) + if os.access(p, os.W_OK): + run ("echo 1 > " + p) def sys_set_ptldebug(ptldebug): if config.ptldebug: @@ -2421,7 +3536,7 @@ def sys_set_ptldebug(ptldebug): if ptldebug: try: val = eval(ptldebug, ptldebug_names) - val = "0x%x" % (val) + val = "0x%x" % (val & 0xffffffffL) sysctl('portals/debug', val) except NameError, e: panic(str(e)) @@ -2432,7 +3547,7 @@ def sys_set_subsystem(subsystem): if subsystem: try: val = eval(subsystem, subsystem_names) - val = "0x%x" % (val) + val = "0x%x" % (val & 0xffffffffL) sysctl('portals/subsystem_debug', val) except NameError, e: panic(str(e)) @@ -2450,14 +3565,12 @@ def sys_set_netmem_max(path, max): fp.write('%d\n' %(max)) fp.close() - def sys_make_devices(): if not os.access('/dev/portals', os.R_OK): run('mknod /dev/portals c 10 240') if not os.access('/dev/obd', os.R_OK): run('mknod /dev/obd c 10 241') - # Add dir to the global PATH, if not already there. def add_to_path(new_dir): syspath = string.split(os.environ['PATH'], ':') @@ -2479,7 +3592,6 @@ def default_gdb_script(): else: return script - DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin') # ensure basic elements are in the system path def sanitise_path(): @@ -2513,6 +3625,13 @@ lconf_options = [ ('config', "Cluster config name used for LDAP query", PARAM), ('select', "service=nodeA,service2=nodeB ", PARAMLIST), ('node', "Load config for ", PARAM), + ('sec',"security flavor between this client with mds", PARAM), + ('mds_sec',"security flavor between this client with mds", PARAM), + ('oss_sec',"security flavor between this client with ost", PARAM), + ('mds_mds_sec',"security flavor between this mds with other mds", PARAM), + ('mds_oss_sec',"security flavor between this mds with ost", PARAM), + ('mds_deny_sec', "security flavor denied by this mds", PARAM), + ('ost_deny_sec', "security flavor denied by this ost", PARAM), ('cleanup,d', "Cleans up config. (Shutdown)"), ('force,f', "Forced unmounting and/or obd detach during cleanup", FLAG, 0), @@ -2532,6 +3651,8 @@ lconf_options = [ ('nosetup', "Skip device setup/cleanup step."), ('reformat', "Reformat all devices (without question)"), ('mkfsoptions', "Additional options for the mk*fs command line", PARAM), + ('mountfsoptions', "Additional options for mount fs command line", PARAM), + ('clientoptions', "Additional options for Lustre", PARAM), ('dump', "Dump the kernel debug log to file before portals is unloaded", PARAM), ('write_conf', "Save all the client config information on mds."), @@ -2539,6 +3660,10 @@ lconf_options = [ ('record_log', "Name of config record log.", PARAM), ('record_device', "MDS device name that will record the config commands", PARAM), + ('root_squash', "MDS squash root to appointed uid", + PARAM), + ('no_root_squash', "Don't squash root for appointed nid", + PARAM), ('minlevel', "Minimum level of services to configure/cleanup", INTPARAM, 0), ('maxlevel', """Maximum level of services to configure/cleanup @@ -2575,7 +3700,7 @@ lconf_options = [ ] def main(): - global lctl, config, toplevel, CONFIG_FILE + global lctl, config, toplustreDB, CONFIG_FILE, mod_manager # in the upcall this is set to SIG_IGN signal.signal(signal.SIGCHLD, signal.SIG_DFL) @@ -2627,8 +3752,9 @@ def main(): except Exception: panic("%s does not appear to be a config file." % (args[0])) sys.exit(1) # make sure to die here, even in debug mode. + config_file.close() CONFIG_FILE = args[0] - db = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement) + lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement) if not config.config: config.config = os.path.basename(args[0])# use full path? if config.config[-4:] == '.xml': @@ -2637,7 +3763,7 @@ def main(): if not config.config: panic("--ldapurl requires --config name") dn = "config=%s,fs=lustre" % (config.config) - db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl) + lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl) elif config.ptldebug or config.subsystem: sys_set_ptldebug(None) sys_set_subsystem(None) @@ -2647,9 +3773,9 @@ def main(): print 'see lconf --help for command summary' sys.exit(1) - toplevel = db + toplustreDB = lustreDB - ver = db.get_version() + ver = lustreDB.get_version() if not ver: panic("No version found in config data, please recreate.") if ver != Lustre.CONFIG_VERSION: @@ -2681,10 +3807,17 @@ def main(): lctl.clear_log(config.record_device, config.record_log) lctl.record(config.record_device, config.record_log) - doHost(db, node_list) + # init module manager + mod_manager = kmod_manager(config.lustre, config.portals) - if config.record: - lctl.end_record() + doHost(lustreDB, node_list) + + if not config.record: + return + + lctl.end_record() + + process_updates(lustreDB, config.record_device, config.record_log) if __name__ == "__main__": try: