From 0fe4c7a39a958f3ead56aee670b79eb3762b0450 Mon Sep 17 00:00:00 2001 From: adilger Date: Tue, 15 Mar 2005 10:56:52 +0000 Subject: [PATCH] Branch: b1_4 Set the mkfs defaults for the MDS to 1 inode/4k, and for the OST 1/16k. We could probably be more aggressive on the OSTs, but this should be safe for any reasonable usage of Lustre. r=phil --- lustre/ChangeLog | 1 + lustre/tests/conf-sanity.sh | 8 +-- lustre/tests/local-large-inode.sh | 3 - lustre/tests/local.sh | 4 +- lustre/tests/lov.sh | 4 +- lustre/tests/recovery-cleanup.sh | 4 +- lustre/tests/sanity.sh | 2 +- lustre/utils/lconf | 132 ++++++++++++++++++-------------------- 8 files changed, 78 insertions(+), 80 deletions(-) delete mode 100755 lustre/tests/local-large-inode.sh diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 3b98693..c5cf281 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -52,6 +52,7 @@ tbd Cluster File Systems, Inc. - don't hold the pinger_sem in ptlrpc_pinger_sending_on_import - change obd_increase_kms to obd_adjust_kms (up or down) (5654) - lconf, lmc search both /usr/lib and /usr/lib64 for Python libs (5800) + - by default create 1 inode per 4kB space on MDS, per 16kB on OSTs 2004-11-23 Cluster File Systems, Inc. * version 1.4.0 diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index f302eab..aa67da2 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -650,21 +650,21 @@ test_16() { EXPECTEDLOGSMODE=`debugfs -R "stat LOGS" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'` EXPECTEDPENDINGMODE=`debugfs -R "stat PENDING" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'` - if [ $EXPECTEDOBJECTSMODE = "0777" ]; then + if [ "$EXPECTEDOBJECTSMODE" = "0777" ]; then echo "Success:Lustre change the mode of OBJECTS correctly" else echo "Error: Lustre does not change the mode of OBJECTS properly" return 1 fi - if [ $EXPECTEDLOGSMODE = "0777" ]; then + if [ "$EXPECTEDLOGSMODE" = "0777" ]; then echo "Success:Lustre change the mode of LOGS correctly" else echo "Error: Lustre does not change the mode of LOGS properly" return 1 fi - if [ $EXPECTEDPENDINGMODE = "0777" ]; then + if [ "$EXPECTEDPENDINGMODE" = "0777" ]; then echo "Success:Lustre change the mode of PENDING correctly" else echo "Error: Lustre does not change the mode of PENDING properly" @@ -712,7 +712,7 @@ test_18() { echo "check journal size..." FOUNDJOURNALSIZE=`debugfs -R "stat <8>" $MDSDEV | awk '/Size: / { print $6; exit;}'` - if [ $FOUNDJOURNALSIZE = "79691776" ]; then + if [ "$FOUNDJOURNALSIZE" = "79691776" ]; then echo "Success:lconf creates large journals" else echo "Error:lconf not create large journals correctly" diff --git a/lustre/tests/local-large-inode.sh b/lustre/tests/local-large-inode.sh deleted file mode 100755 index 7e401b0..0000000 --- a/lustre/tests/local-large-inode.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -#set -vx -JSIZE=32 MDSISIZE=256 sh `dirname $0`/local.sh `basename $0 .sh`.xml diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh index d231c41..d97d094 100755 --- a/lustre/tests/local.sh +++ b/lustre/tests/local.sh @@ -38,7 +38,9 @@ ${LMC} --add mds --nspath /mnt/mds_ns --node localhost --mds mds1 --fstype $FST # configure ost ${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20 -${LMC} --add ost --nspath /mnt/ost_ns --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE $JARG || exit 30 +# only specify "--mkfsoptions='-i 8192'" here because test fs is so small, +# on a real fs this is not needed unless all files tiny with many stripes +${LMC} --add ost --nspath /mnt/ost_ns --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE --mkfsoptions="-i 8192" $JARG || exit 30 # create client config ${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40 diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh index f9fceb0..709de03 100755 --- a/lustre/tests/lov.sh +++ b/lustre/tests/lov.sh @@ -48,7 +48,9 @@ for num in `seq $OSTCOUNT`; do OST=ost$num DEVPTR=OSTDEV$num eval $DEVPTR=${!DEVPTR:=$TMP/$OST-`hostname`} - ${LMC} --add ost --node localhost --lov lov1 --ost $OST --fstype $FSTYPE --dev ${!DEVPTR} --size $OSTSIZE $JARG || exit 30 + # only specify "--mkfsoptions='-i 8192'" here because test fs is so small, + # on a real fs this is not needed unless all files tiny with many stripes + ${LMC} --add ost --node localhost --lov lov1 --ost $OST --fstype $FSTYPE --dev ${!DEVPTR} --size $OSTSIZE --mkfsoptions="-i 8192" $JARG || exit 30 done diff --git a/lustre/tests/recovery-cleanup.sh b/lustre/tests/recovery-cleanup.sh index c783fe0..c0d8a51 100755 --- a/lustre/tests/recovery-cleanup.sh +++ b/lustre/tests/recovery-cleanup.sh @@ -59,8 +59,8 @@ make_config() { --stripe_cnt 0 --stripe_pattern 0 || exit 6 lmc -m $CONFIG --add ost --nspath /mnt/ost_ns --node $OSTNODE \ --lov lov1 --dev $OSTDEV --size $OSTSIZE --fstype $FSTYPE || exit 7 - lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \ - --lov lov1 || exit 8 + lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT \ + --mds mds1 --lov lov1 || exit 8 } start_mds() { diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 916bda2..bfaff19 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -180,7 +180,7 @@ fi DIR=${DIR:-$MOUNT} [ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99 -LOVNAME=`cat /proc/fs/lustre/llite/fs0/lov/common_name` +LOVNAME=`cat /proc/fs/lustre/llite/*/lov/common_name | tail -n 1` OSTCOUNT=`cat /proc/fs/lustre/lov/$LOVNAME/numobd` STRIPECOUNT=`cat /proc/fs/lustre/lov/$LOVNAME/stripecount` STRIPESIZE=`cat /proc/fs/lustre/lov/$LOVNAME/stripesize` diff --git a/lustre/utils/lconf b/lustre/utils/lconf index b818deb..a823c2f 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -23,7 +23,7 @@ # lconf is the main driver script for starting and stopping # lustre filesystem services. # -# Based in part on the XML obdctl modifications done by Brian Behlendorf +# Based in part on the XML obdctl modifications done by Brian Behlendorf import sys, getopt, types import string, os, stat, popen2, socket, time, random, fcntl, select @@ -59,10 +59,10 @@ MAX_LOOP_DEVICES = 256 PORTALS_DIR = '../portals' # Needed to call lconf --record -CONFIG_FILE = "" +CONFIG_FILE = "" # Please keep these in sync with the values in portals/kp30.h -ptldebug_names = { +ptldebug_names = { "trace" : (1 << 0), "inode" : (1 << 1), "super" : (1 << 2), @@ -126,7 +126,7 @@ def cleanup_error(rc): if not first_cleanup_error: first_cleanup_error = rc -# ============================================================ +# ============================================================ # debugging and error funcs def fixme(msg = "this feature"): @@ -157,7 +157,7 @@ def debug(*args): def my_int(s): import types if type(s) is types.IntType: - return s + return s try: if (s[0:2] == '0x') or (s[0:1] == '0'): return eval(s, {}, {}) @@ -270,7 +270,7 @@ class DaemonHandler: return 0 except IOError: return 0 - + def clean_pidfile(self): """ Remove a stale pidfile """ log("removing stale pidfile:", self.pidfile()) @@ -278,7 +278,7 @@ class DaemonHandler: os.unlink(self.pidfile()) except OSError, e: log(self.pidfile(), e) - + class AcceptorHandler(DaemonHandler): def __init__(self, port, net_type): DaemonHandler.__init__(self, "acceptor") @@ -291,7 +291,7 @@ class AcceptorHandler(DaemonHandler): def command_line(self): return string.join(map(str,(self.flags, self.port))) - + acceptors = {} # start the acceptors @@ -311,14 +311,14 @@ def run_one_acceptor(port): if daemon.net_type == 'tcp' and not daemon.running(): daemon.start() else: - panic("run_one_acceptor: No acceptor defined for port:", port) - + panic("run_one_acceptor: No acceptor defined for port:", port) + def stop_acceptor(port): if acceptors.has_key(port): daemon = acceptors[port] if daemon.net_type == 'tcp' and daemon.running(): daemon.stop() - + # ============================================================ # handle lctl interface @@ -343,7 +343,7 @@ class LCTLInterface: def use_save_file(self, file): self.save_file = file - + def record(self, dev_name, logname): log("Recording log", logname, "on", dev_name) self.record_device = dev_name @@ -375,7 +375,7 @@ class LCTLInterface: device $%s record %s %s""" % (self.record_device, self.record_log, cmds) - + debug("+", cmd_line, cmds) if config.noexec: return (0, []) @@ -426,7 +426,6 @@ class LCTLInterface: raise CommandError(self.lctl, out, rc) return rc, out - def clear_log(self, dev, log): """ clear an existing log """ cmds = """ @@ -489,7 +488,7 @@ class LCTLInterface: quit""" % (net_type, nid, hostaddr ) self.run(cmds) - + def connect(self, srv): self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid) if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump: @@ -503,7 +502,7 @@ class LCTLInterface: device $%s recover %s""" %(dev_name, new_conn) self.run(cmds) - + # add a route to a range def add_route(self, net, gw, lo, hi): cmds = """ @@ -516,7 +515,7 @@ class LCTLInterface: except CommandError, e: log ("ignore: ") e.dump() - + def del_route(self, net, gw, lo, hi): cmds = """ ignore_errors @@ -567,7 +566,7 @@ class LCTLInterface: quit""" % (net_type, nid) self.run(cmds) - + # disconnect one connection def disconnect(self, srv): self.del_uuid(srv.nid_uuid) @@ -597,14 +596,14 @@ class LCTLInterface: attach %s %s %s quit""" % (type, name, uuid) self.run(cmds) - + def setup(self, name, setup = ""): cmds = """ cfg_device %s setup %s quit""" % (name, setup) self.run(cmds) - + # create a new device with lctl def newdev(self, type, name, uuid, setup = ""): @@ -614,7 +613,7 @@ class LCTLInterface: except CommandError, e: self.cleanup(name, uuid, 0) raise e - + # cleanup a device def cleanup(self, name, uuid, force, failover = 0): @@ -765,7 +764,7 @@ def find_module(src_dir, dev_dir, modname): modbase = src_dir +'/'+ dev_dir +'/'+ modname for modext in '.ko', '.o': module = modbase + modext - try: + try: if os.access(module, os.R_OK): return module except OSError: @@ -846,7 +845,7 @@ def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): if devsize > 1024 * 1024: jsize = ((devsize / 102400) * 4) if jsize > 400: - jsize = 400 + jsize = 400 if jsize: jopt = "-J size=%d" %(jsize,) if isize: iopt = "-I %d" %(isize,) mkfs = 'mkfs.ext2 -j -b 4096 ' @@ -882,7 +881,7 @@ def loop_base(): if not os.access(loop + str(0), os.R_OK): panic("can't access loop devices") return loop - + # find loop device assigned to thefile def find_loop(file): loop = loop_base() @@ -939,7 +938,7 @@ def clean_loop(file): # determine if dev is formatted as a filesystem def need_format(fstype, dev): - # FIXME don't know how to implement this + # FIXME don't know how to implement this return 0 # initialize a block device if needed @@ -956,7 +955,6 @@ def block_dev(dev, size, fstype, reformat, autoformat, journal_size, # panic("device:", dev, # "not prepared, and autoformat is not set.\n", # "Rerun with --reformat option to format ALL filesystems") - return dev def if2addr(iface): @@ -994,7 +992,7 @@ def sys_get_local_nid(net_type, wildcard, cluster_id): else: local = sys_get_local_address(net_type, wildcard, cluster_id) return local - + def sys_get_local_address(net_type, wildcard, cluster_id): """Return the local address for the network type.""" local = "" @@ -1022,7 +1020,7 @@ def sys_get_local_address(net_type, wildcard, cluster_id): elan_id = a[1] break try: - nid = my_int(cluster_id) + my_int(elan_id) + nid = my_int(cluster_id) + my_int(elan_id) local = "%d" % (nid) except ValueError, e: local = elan_id @@ -1050,7 +1048,7 @@ def sys_get_branch(): fp = open('/proc/sys/kernel/osrelease') lines = fp.readlines() fp.close() - + for l in lines: version = string.split(l) a = string.split(version[0], '.') @@ -1109,7 +1107,6 @@ def fs_is_mounted(path): except IOError, e: log(e) return 0 - class kmod: """Manage kernel modules""" @@ -1176,7 +1173,7 @@ class Module: self._server = None self._connected = 0 self.kmod = kmod(config.lustre, config.portals) - + def info(self, *args): msg = string.join(map(str,args)) print self.module_name + ":", self.name, self.uuid, msg @@ -1190,7 +1187,7 @@ class Module: log(self.module_name, "cleanup failed: ", self.name) e.dump() cleanup_error(e.rc) - + def add_portals_module(self, dev_dir, modname): """Append a module to list of modules to load.""" self.kmod.add_portals_module(dev_dir, modname) @@ -1202,7 +1199,7 @@ class Module: def load_module(self): """Load all the modules in the list in the order they appear.""" self.kmod.load_module() - + def cleanup_module(self): """Unload the modules in the list in reverse order.""" if self.safe_to_clean(): @@ -1210,10 +1207,10 @@ class Module: def safe_to_clean(self): return 1 - + def safe_to_clean_modules(self): return self.safe_to_clean() - + class Network(Module): def __init__(self,db): Module.__init__(self, 'NETWORK', db) @@ -1364,7 +1361,7 @@ class RouteTable(Module): return None return Network(srvdb) - + def prepare(self): if is_network_prepared(): return @@ -1464,13 +1461,13 @@ class LOV(Module): self.osclist.append(osc) else: panic('osc not found:', obd_uuid) - + def prepare(self): if is_prepared(self.name): return if self.config_only: panic("Can't prepare config_only LOV ", self.name) - + for osc in self.osclist: try: # Only ignore connect failures with --force, which @@ -1518,7 +1515,7 @@ class MDSDEV(Module): self.journal_size = self.db.get_val_int('journalsize', 0) self.fstype = self.db.get_val('fstype', '') self.nspath = self.db.get_val('nspath', '') - self.mkfsoptions = self.db.get_val('mkfsoptions', '') + self.mkfsoptions = '-i 4096 ' + self.db.get_val('mkfsoptions', '') self.mountfsoptions = self.db.get_val('mountfsoptions', '') # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid target_uuid = self.db.get_first_ref('target') @@ -1585,7 +1582,7 @@ class MDSDEV(Module): def load_module(self): if self.active: Module.load_module(self) - + def prepare(self): if is_prepared(self.name): return @@ -1603,9 +1600,9 @@ class MDSDEV(Module): self.mkfsoptions) if not is_prepared('MDT'): lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") - try: + try: mountfsoptions = def_mount_options(self.fstype, 'mds') - + if config.mountfsoptions: if mountfsoptions: mountfsoptions = mountfsoptions + ',' + config.mountfsoptions @@ -1640,7 +1637,7 @@ class MDSDEV(Module): self.inode_size, self.mkfsoptions) lctl.newdev("mds", self.name, self.uuid, setup ="%s %s" %(blkdev, self.fstype)) - + # record logs for the MDS lov for uuid in self.filesystem_uuids: log("recording clients for filesystem:", uuid) @@ -1772,7 +1769,7 @@ class OSD(Module): self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) self.inode_size = self.db.get_val_int('inodesize', 0) - self.mkfsoptions = self.db.get_val('mkfsoptions', '') + self.mkfsoptions = '-i 16384 ' + self.db.get_val('mkfsoptions', '') self.mountfsoptions = self.db.get_val('mountfsoptions', '') self.fstype = self.db.get_val('fstype', '') self.nspath = self.db.get_val('nspath', '') @@ -1794,7 +1791,7 @@ class OSD(Module): self.active = 0 if self.active and config.group and config.group != ost.get_val('group'): self.active = 0 - + self.target_dev_uuid = self.uuid self.uuid = target_uuid # modules @@ -1830,7 +1827,7 @@ class OSD(Module): self.inode_size, self.mkfsoptions) mountfsoptions = def_mount_options(self.fstype, 'ost') - + if config.mountfsoptions: if mountfsoptions: mountfsoptions = mountfsoptions + ',' + config.mountfsoptions @@ -2010,7 +2007,7 @@ class ManagementClient(Client): Client.__init__(self, db, uuid, 'mgmt_cli', '', self_name = mgmtcli_name_for_uuid(db.getUUID()), module_dir = 'mgmt') - + class COBD(Module): def __init__(self, db): Module.__init__(self, 'COBD', db) @@ -2166,7 +2163,7 @@ class Mountpoint(Module): run("mkdir", self.path) ret, val = run(cmd) if ret: - self.mdc.cleanup() + self.mdc.cleanup() self.vosc.cleanup() panic("mount failed:", self.path, ":", string.join(val)) @@ -2224,7 +2221,7 @@ def get_ost_net(self, osd_uuid): return srv_list -# the order of iniitailization is based on level. +# the order of iniitailization is based on level. def getServiceLevel(self): type = self.get_class() ret=0; @@ -2246,7 +2243,7 @@ def getServiceLevel(self): panic("Unknown type: ", type) if ret < config.minlevel or ret > config.maxlevel: - ret = 0 + ret = 0 return ret # @@ -2254,7 +2251,7 @@ def getServiceLevel(self): # [(level, db_object),] def getServices(self): list = [] - for ref_class, ref_uuid in self.get_all_refs(): + for ref_class, ref_uuid in self.get_all_refs(): servdb = self.lookup(ref_uuid) if servdb: level = getServiceLevel(servdb) @@ -2268,7 +2265,7 @@ def getServices(self): ############################################################ -# MDC UUID hack - +# MDC UUID hack - # FIXME: clean this mess up! # # OSC is no longer in the xml, so we have to fake it. @@ -2375,7 +2372,7 @@ def find_route(srv_list): if (r[3] <= to and to <= r[4]) and cluster_id == r[2]: result.append((srv, r)) return result - + def get_active_target(db): target_uuid = db.getUUID() target_name = db.getName() @@ -2391,7 +2388,7 @@ def get_server_by_nid_uuid(db, nid_uuid): net = Network(n) if net.nid_uuid == nid_uuid: return net - + ############################################################ # lconf level logic @@ -2426,7 +2423,7 @@ def newService(db): # # Prepare the system to run lustre using a particular profile -# in a the configuration. +# in a the configuration. # * load & the modules # * setup networking for the current node # * make sure partitions are in place and prepared @@ -2439,7 +2436,7 @@ def for_each_profile(db, prof_list, operation): panic("profile:", prof_uuid, "not found.") services = getServices(prof_db) operation(services) - + def doWriteconf(services): if config.nosetup: return @@ -2454,7 +2451,7 @@ def doSetup(services): for s in services: n = newService(s[1]) n.prepare() - + def doModules(services): if config.nomod: return @@ -2481,7 +2478,7 @@ def doUnloadModules(services): n.cleanup_module() # -# Load profile for +# Load profile for def doHost(lustreDB, hosts): global is_router, local_node_name node_db = None @@ -2499,7 +2496,7 @@ def doHost(lustreDB, hosts): timeout = node_db.get_val_int('timeout', 0) ptldebug = node_db.get_val('ptldebug', '') subsystem = node_db.get_val('subsystem', '') - + find_local_clusters(node_db) if not is_router: find_local_routes(lustreDB) @@ -2609,7 +2606,7 @@ def setupModulePath(cmd, portals_dir = PORTALS_DIR): base = os.path.dirname(cmd) if development_mode(): if not config.lustre: - debug('using objdir module paths') + debug('using objdir module paths') config.lustre = (os.path.join(base, "..")) # normalize the portals dir, using command line arg if set if config.portals: @@ -2619,7 +2616,7 @@ def setupModulePath(cmd, portals_dir = PORTALS_DIR): debug('config.portals', config.portals) elif config.lustre and config.portals: # production mode - # if --lustre and --portals, normalize portals + # if --lustre and --portals, normalize portals # can ignore POTRALS_DIR here, since it is probly useless here config.portals = os.path.join(config.lustre, config.portals) debug('config.portals B', config.portals) @@ -2722,8 +2719,7 @@ def sys_set_netmem_max(path, max): fp = open(path, 'w') fp.write('%d\n' %(max)) fp.close() - - + def sys_make_devices(): if not os.access('/dev/portals', os.R_OK): run('mknod /dev/portals c 10 240') @@ -2737,7 +2733,7 @@ def add_to_path(new_dir): if new_dir in syspath: return os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir - + def default_debug_path(): path = '/tmp/lustre-log' if os.path.isdir('/r'): @@ -2815,7 +2811,7 @@ lconf_options = [ PARAM), ('minlevel', "Minimum level of services to configure/cleanup", INTPARAM, 0), - ('maxlevel', """Maximum level of services to configure/cleanup + ('maxlevel', """Maximum level of services to configure/cleanup Levels are aproximatly like: 10 - netwrk 20 - device, ldlm @@ -2846,14 +2842,14 @@ lconf_options = [ ('inactive', """The name of an inactive service, to be ignored during mounting (currently OST-only). Can be repeated.""", PARAMLIST), - ] + ] def main(): global lctl, config, toplustreDB, CONFIG_FILE # in the upcall this is set to SIG_IGN signal.signal(signal.SIGCHLD, signal.SIG_DFL) - + cl = Lustre.Options("lconf", "config.xml", lconf_options) try: config, args = cl.parse(sys.argv[1:]) @@ -2876,7 +2872,7 @@ def main(): random.seed(seed) sanitise_path() - + init_select(config.select) if len(args) > 0: -- 1.8.3.1