From ca6dd9a48e7b3ebf5df8a78c8ed65f7676cebf4d Mon Sep 17 00:00:00 2001 From: bobijam Date: Thu, 13 Dec 2007 04:18:02 +0000 Subject: [PATCH] Branch HEAD b=12211 i=green, adilger Description: make lustre randomly fail allocating memory Details : Make lustre randomly failed allocating memory for testing purpose. --- lustre/ChangeLog | 4 +++ lustre/include/linux/lvfs.h | 4 +++ lustre/include/obd_support.h | 48 +++++++++++++++++++++-------- lustre/lvfs/Makefile.in | 2 +- lustre/lvfs/autoMakefile.am | 2 +- lustre/lvfs/lvfs_lib.c | 60 ++++++++++++++++++++++++++++++++++++ lustre/lvfs/lvfs_linux.c | 1 - lustre/{obdclass => lvfs}/prng.c | 0 lustre/obdclass/Makefile.in | 2 +- lustre/obdclass/autoMakefile.am | 4 +-- lustre/obdclass/class_obd.c | 4 --- lustre/obdclass/linux/linux-sysctl.c | 47 ++++++++++++++++++++++++++++ 12 files changed, 156 insertions(+), 22 deletions(-) create mode 100644 lustre/lvfs/lvfs_lib.c rename lustre/{obdclass => lvfs}/prng.c (100%) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 1683f34..3573954 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -12,6 +12,10 @@ * Recommended e2fsprogs version: 1.40.2-cfs4 * Note that reiserfs quotas are disabled on SLES 10 in this kernel. +Severity : enhancement +Bugzilla : 12211 +Description: make lustre randomly fail allocating memory +Details : Make lustre randomly failed allocating memory for testing purpose. Severity : enhancement Bugzilla : 12702 diff --git a/lustre/include/linux/lvfs.h b/lustre/include/linux/lvfs.h index 7c2f6ae..1cc27c4 100644 --- a/lustre/include/linux/lvfs.h +++ b/lustre/include/linux/lvfs.h @@ -38,6 +38,10 @@ struct group_info { /* unused */ }; #define LLOG_LVFS +/* lvfs.c */ +int obd_alloc_fail(const void *ptr, const char *name, const char *type, + size_t size, const char *file, int line); + /* simple.c */ struct lvfs_ucred { diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 469ded9..befcde8 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -24,6 +24,7 @@ #define _OBD_SUPPORT #include +#include #include /* global variables */ @@ -49,6 +50,7 @@ extern unsigned int obd_max_dirty_pages; extern atomic_t obd_dirty_pages; extern cfs_waitq_t obd_race_waitq; extern int obd_race_state; +extern unsigned int obd_alloc_fail_rate; /* Timeout definitions */ #define LDLM_TIMEOUT_DEFAULT 20 @@ -222,6 +224,8 @@ extern int obd_race_state; #define OBD_FAIL_LPROC_REMOVE 0xb00 +#define OBD_FAIL_GENERAL_ALLOC 0xc00 + #define OBD_FAIL_SEQ 0x1000 #define OBD_FAIL_SEQ_QUERY_NET 0x1001 @@ -564,6 +568,16 @@ __put_mem_track(void *ptr, int size, #endif /* !OBD_DEBUG_MEMUSAGE */ +#ifdef RANDOM_FAIL_ALLOC +#define HAS_FAIL_ALLOC_FLAG OBD_FAIL_CHECK(OBD_FAIL_GENERAL_ALLOC) +#else +#define HAS_FAIL_ALLOC_FLAG 0 +#endif + +#define OBD_ALLOC_FAIL_BITS 24 +#define OBD_ALLOC_FAIL_MASK ((1 << OBD_ALLOC_FAIL_BITS) - 1) +#define OBD_ALLOC_FAIL_MULT (OBD_ALLOC_FAIL_MASK / 100) + #if defined(LUSTRE_UTILS) /* this version is for utils only */ #define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ do { \ @@ -578,15 +592,20 @@ do { \ } \ } while (0) #else /* this version is for the kernel and liblustre */ +#define OBD_FREE_RTN0(ptr) \ +({ \ + cfs_free(ptr); \ + (ptr) = NULL; \ + 0; \ +}) #define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ do { \ (ptr) = cfs_alloc(size, (gfp_mask)); \ - if (unlikely((ptr) == NULL)) { \ - CERROR("kmalloc of '" #ptr "' (%d bytes) failed\n", \ - (int)(size)); \ - CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \ - obd_memory_sum(), atomic_read(&libcfs_kmemory)); \ - } else { \ + if (likely((ptr) != NULL && \ + (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 || \ + !obd_alloc_fail(ptr, #ptr, "km", size, \ + __FILE__, __LINE__) || \ + OBD_FREE_RTN0(ptr)))){ \ memset(ptr, 0, size); \ OBD_ALLOC_POST(ptr, size, "kmalloced"); \ } \ @@ -686,16 +705,21 @@ do { \ /* we memset() the slab object to 0 when allocation succeeds, so DO NOT * HAVE A CTOR THAT DOES ANYTHING. its work will be cleared here. we'd * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */ +#define OBD_SLAB_FREE_RTN0(ptr, slab) \ +({ \ + cfs_mem_cache_free((slab), (ptr)); \ + (ptr) = NULL; \ + 0; \ +}) #define OBD_SLAB_ALLOC(ptr, slab, type, size) \ do { \ LASSERT(!in_interrupt()); \ (ptr) = cfs_mem_cache_alloc(slab, (type)); \ - if (unlikely((ptr) == NULL)) { \ - CERROR("slab-alloc of '"#ptr"' (%d bytes) failed\n", \ - (int)(size)); \ - CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \ - obd_memory_sum(), atomic_read(&libcfs_kmemory)); \ - } else { \ + if (likely((ptr) != NULL && \ + (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 || \ + !obd_alloc_fail(ptr, #ptr, "slab-", size, \ + __FILE__, __LINE__) || \ + OBD_SLAB_FREE_RTN0(ptr, slab)))) { \ memset(ptr, 0, size); \ OBD_ALLOC_POST(ptr, size, "slab-alloced"); \ } \ diff --git a/lustre/lvfs/Makefile.in b/lustre/lvfs/Makefile.in index afa2511..f103b62 100644 --- a/lustre/lvfs/Makefile.in +++ b/lustre/lvfs/Makefile.in @@ -2,7 +2,7 @@ MODULES := lvfs @SERVER_TRUE@MODULES += fsfilt_@BACKINGFS@ @QUOTA_TRUE@MODULES += quotafmt_test -lvfs-objs := lvfs_common.o lvfs_linux.o fsfilt.o upcall_cache.o +lvfs-objs := lvfs_common.o lvfs_linux.o fsfilt.o upcall_cache.o prng.o lvfs_lib.o @QUOTA_TRUE@lvfs-objs += lustre_quota_fmt.o @QUOTA_TRUE@quotafmt-objs := quotafmt_test.o diff --git a/lustre/lvfs/autoMakefile.am b/lustre/lvfs/autoMakefile.am index e923452..658e540 100644 --- a/lustre/lvfs/autoMakefile.am +++ b/lustre/lvfs/autoMakefile.am @@ -4,7 +4,7 @@ # See the file COPYING in this distribution if LIBLUSTRE noinst_LIBRARIES = liblvfs.a -liblvfs_a_SOURCES = lvfs_userfs.c +liblvfs_a_SOURCES = lvfs_userfs.c prng.c lvfs_lib.c liblvfs_a_CFLAGS = $(LLCFLAGS) liblvfs_a_CPPFLAGS = $(LLCPPFLAGS) diff --git a/lustre/lvfs/lvfs_lib.c b/lustre/lvfs/lvfs_lib.c new file mode 100644 index 0000000..8ea2133 --- /dev/null +++ b/lustre/lvfs/lvfs_lib.c @@ -0,0 +1,60 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/lvfs/lvfs_lib.c + * Lustre filesystem abstraction routines + * + * Copyright (C) 2007 Cluster File Systems, Inc. + * Author: Andreas Dilger + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifdef __KERNEL__ +#include +#include +#else +#include +#endif +#include + +unsigned int obd_fail_val = 0; +unsigned int obd_fail_loc = 0; +unsigned int obd_alloc_fail_rate = 0; + +int obd_alloc_fail(const void *ptr, const char *name, const char *type, + size_t size, const char *file, int line) +{ + if (ptr == NULL || + (ll_rand() & OBD_ALLOC_FAIL_MASK) < obd_alloc_fail_rate) { + CERROR("%s%salloc of %s ("LPU64" bytes) failed at %s:%d\n", + ptr ? "force " :"", type, name, (__u64)size, file, + line); + CERROR(LPU64" total bytes and "LPU64" total pages " + "("LPU64" bytes) allocated by Lustre, " + "%d total bytes by LNET\n", + obd_memory_sum(), + obd_pages_sum() << CFS_PAGE_SHIFT, + obd_pages_sum(), + atomic_read(&libcfs_kmemory)); + return 1; + } + return 0; +} +EXPORT_SYMBOL(obd_alloc_fail); + +EXPORT_SYMBOL(obd_fail_loc); +EXPORT_SYMBOL(obd_alloc_fail_rate); +EXPORT_SYMBOL(obd_fail_val); diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 9bdcf35..e342eeb 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/lustre/obdclass/prng.c b/lustre/lvfs/prng.c similarity index 100% rename from lustre/obdclass/prng.c rename to lustre/lvfs/prng.c diff --git a/lustre/obdclass/Makefile.in b/lustre/obdclass/Makefile.in index 30b7361..11f93a0 100644 --- a/lustre/obdclass/Makefile.in +++ b/lustre/obdclass/Makefile.in @@ -23,7 +23,7 @@ obdclass-all-objs := llog.o llog_cat.o llog_lvfs.o llog_obd.o llog_swab.o obdclass-all-objs += class_obd.o class_hash.o obdclass-all-objs += debug.o genops.o uuid.o llog_ioctl.o obdclass-all-objs += lprocfs_status.o lustre_handles.o lustre_peer.o -obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o prng.o mea.o +obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o mea.o obdclass-all-objs += lu_object.o dt_object.o hash.o capa.o lu_time.o obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs) diff --git a/lustre/obdclass/autoMakefile.am b/lustre/obdclass/autoMakefile.am index 7a6fdce..21886e6 100644 --- a/lustre/obdclass/autoMakefile.am +++ b/lustre/obdclass/autoMakefile.am @@ -11,7 +11,7 @@ liblustreclass_a_SOURCES = class_obd.c debug.c genops.c statfs_pack.c mea.c uuid liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c class_hash.c liblustreclass_a_SOURCES += obdo.c obd_config.c llog.c llog_obd.c llog_cat.c liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c capa.c -liblustreclass_a_SOURCES += prng.c #llog_ioctl.c rbtree.c +liblustreclass_a_SOURCES += #llog_ioctl.c rbtree.c liblustreclass_a_CPPFLAGS = $(LLCPPFLAGS) liblustreclass_a_CFLAGS = $(LLCFLAGS) @@ -32,7 +32,7 @@ obdclass_SOURCES := \ darwin/darwin-module.c darwin/darwin-sysctl.c \ class_obd.c genops.c lprocfs_status.c \ lustre_handles.c lustre_peer.c obd_config.c \ - obdo.c debug.c llog_ioctl.c uuid.c prng.c \ + obdo.c debug.c llog_ioctl.c uuid.c \ llog_swab.c llog_obd.c llog.c llog_cat.c llog_lvfs.c \ mea.c lu_object.c dt_object.c hash.c diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 2e87394..243063d 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -60,8 +60,6 @@ __u64 obd_pages; #endif /* The following are visible and mutable through /proc/sys/lustre/. */ -unsigned int obd_fail_loc; -unsigned int obd_fail_val; unsigned int obd_debug_peer_on_timeout; unsigned int obd_dump_on_timeout; unsigned int obd_dump_on_eviction; @@ -379,8 +377,6 @@ void *obd_psdev = NULL; #endif EXPORT_SYMBOL(obd_devs); -EXPORT_SYMBOL(obd_fail_loc); -EXPORT_SYMBOL(obd_fail_val); EXPORT_SYMBOL(obd_print_fail_loc); EXPORT_SYMBOL(obd_race_waitq); EXPORT_SYMBOL(obd_race_state); diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c index 05b4baf..b5d950e 100644 --- a/lustre/obdclass/linux/linux-sysctl.c +++ b/lustre/obdclass/linux/linux-sysctl.c @@ -42,6 +42,7 @@ #define DEBUG_SUBSYSTEM S_CLASS #include +#include cfs_sysctl_table_header_t *obd_table_header = NULL; @@ -60,6 +61,7 @@ enum { OBD_LDLM_TIMEOUT, /* LDLM timeout for ASTs before client eviction */ OBD_DUMP_ON_EVICTION, /* dump kernel debug log upon eviction */ OBD_DEBUG_PEER_ON_TIMEOUT, /* dump peer debug when RPC times out */ + OBD_ALLOC_FAIL_RATE, /* memory allocation random failure rate */ }; int LL_PROC_PROTO(proc_fail_loc) @@ -179,6 +181,41 @@ int LL_PROC_PROTO(proc_pages_max) return 0; } +#ifdef RANDOM_FAIL_ALLOC +int LL_PROC_PROTO(proc_alloc_fail_rate) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) + loff_t *ppos = &filp->f_pos; +#endif + int rc = 0; + + if (!table->data || !table->maxlen || !*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + if (write) { + rc = lprocfs_write_frac_helper(buffer, *lenp, + (unsigned int*)table->data, + OBD_ALLOC_FAIL_MULT); + } else { + char buf[21]; + int len; + + len = lprocfs_read_frac_helper(buf, 21, + *(unsigned int*)table->data, + OBD_ALLOC_FAIL_MULT); + if (len > *lenp) + len = *lenp; + buf[len] = '\0'; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + *lenp = len; + } + *ppos += *lenp; + return rc; +} +#endif + static cfs_sysctl_table_t obd_table[] = { { .ctl_name = OBD_FAIL_LOC, @@ -268,6 +305,16 @@ static cfs_sysctl_table_t obd_table[] = { .mode = 0644, .proc_handler = &proc_set_timeout }, +#ifdef RANDOM_FAIL_LOC + { + .ctl_name = OBD_ALLOC_FAIL_RATE, + .procname = "alloc_fail_rate", + .data = &obd_alloc_fail_rate, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_alloc_fail_rate + }, +#endif { 0 } }; -- 1.8.3.1