From: ccooper Date: Wed, 15 Sep 2004 20:34:38 +0000 (+0000) Subject: r=adilger,nic X-Git-Tag: v1_8_0_110~486^5~165 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=197d5c0b2456c13b8b16e484216a6d9ed07bb164;p=fs%2Flustre-release.git r=adilger,nic - landing b1_2_smallfix --- diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 01fbe35..0b55543 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -352,9 +352,7 @@ out: char *portals_debug_dumpstack(void) { - char *buf = stack_backtrace; - buf[0] = '\0'; - return buf; + return "dump_stack\n"; } #endif /* __arch_um__ */ diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 22dd2fb..3e22a14 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1,3 +1,17 @@ +tbd Cluster File Systems, Inc. + * version 1.2.6 + * bug fixes + - avoid crash during MDS cleanup with OST shut down (2775) + - fix loi_list_lock/oig_lock inversion on interrupted IO (4136) + - don't use bad inodes on the MDS (3744) + - dynamic object preallocation to improve recovery speed (4236) + - don't hold spinlock over lock dumping or change debug flags (4401) + - don't zero obd_dev when it is force cleaned (3651) + - print grants to console if they go negative (4431) + - "lctl deactivate" will stop automatic recovery attempts (3406) + - look for existing locks in ldlm_handle_enqueue() (3764) + - don't resolve lock handle twice in recovery avoiding race (4401) + 2004-08-24 Cluster File Systems, Inc. * version 1.2.5 * bug fixes @@ -19,6 +33,7 @@ - replace some LBUG about llog ops with error handling (3841) - don't match INVALID dentries from d_lookup and spin (3784) - hold dcache_lock while marking dentries INVALID and hashing (4255) + - fix invalid assertion in ptlrpc_set_wait (3880) * miscellania - add libwrap support for the TCP acceptor (3996) - add /proc/sys/portals/routes for non-root route listing (3994) diff --git a/lustre/configure.in b/lustre/configure.in index 2a70dd6..f3b2eb5 100644 --- a/lustre/configure.in +++ b/lustre/configure.in @@ -5,7 +5,7 @@ AC_INIT AC_CANONICAL_SYSTEM -AM_INIT_AUTOMAKE(lustre, 1.2.5) +AM_INIT_AUTOMAKE(lustre, 1.2.5.4) # AM_MAINTAINER_MODE # Four main targets: lustre kernel modules, utilities, tests, and liblustre diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index 9c2aa53..36740b9 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -171,6 +171,7 @@ struct ldlm_namespace { struct list_head ns_unused_list; /* all root resources in ns */ int ns_nr_unused; unsigned int ns_max_unused; + unsigned long ns_next_dump; /* next dump time */ spinlock_t ns_counter_lock; __u64 ns_locks; @@ -508,9 +509,9 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, struct ldlm_lock *lock); void ldlm_resource_unlink_lock(struct ldlm_lock *lock); void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc); -void ldlm_dump_all_namespaces(void); -void ldlm_namespace_dump(struct ldlm_namespace *); -void ldlm_resource_dump(struct ldlm_resource *); +void ldlm_dump_all_namespaces(int level); +void ldlm_namespace_dump(int level, struct ldlm_namespace *); +void ldlm_resource_dump(int level, struct ldlm_resource *); int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, struct ldlm_res_id); diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h index 52b5c7a..7e96ad1 100644 --- a/lustre/include/linux/lustre_export.h +++ b/lustre/include/linux/lustre_export.h @@ -29,9 +29,7 @@ struct osc_creator { struct obd_device *oscc_obd; obd_id oscc_last_id;//last available pre-created object obd_id oscc_next_id;// what object id to give out next - int oscc_initial_create_count; int oscc_grow_count; - int oscc_kick_barrier; struct osc_created *oscc_osccd; struct obdo oscc_oa; int oscc_flags; @@ -53,9 +51,9 @@ struct filter_export_data { struct filter_client_data *fed_fcd; loff_t fed_lr_off; int fed_lr_idx; - unsigned long fed_dirty; /* in bytes */ - unsigned long fed_grant; /* in bytes */ - unsigned long fed_pending; /* bytes just being written */ + long fed_dirty; /* in bytes */ + long fed_grant; /* in bytes */ + long fed_pending; /* bytes just being written */ }; struct obd_export { diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index b9beff5..e39729e 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -93,6 +93,16 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops); #define FSFILT_OP_LINK 9 #define FSFILT_OP_CANCEL_UNLINK 10 +#define fsfilt_check_slow(start, timeout, msg) \ +do { \ + if (time_before(jiffies, start + 15 * HZ)) \ + break; \ + else if (time_before(jiffies, start + timeout / 2 * HZ)) \ + CWARN("slow %s %lus\n", msg, (jiffies - start) / HZ); \ + else \ + CERROR("slow %s %lus\n", msg, (jiffies - start) / HZ); \ +} while (0) + static inline void *fsfilt_start_log(struct obd_device *obd, struct inode *inode, int op, struct obd_trans_info *oti, int logs) @@ -111,8 +121,7 @@ static inline void *fsfilt_start_log(struct obd_device *obd, LBUG(); } } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "journal start"); return handle; } @@ -143,8 +152,7 @@ static inline void *fsfilt_brw_start_log(struct obd_device *obd, LBUG(); } } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "journal start"); return handle; } @@ -164,8 +172,7 @@ static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode, int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync); CDEBUG(D_INFO, "committing handle %p\n", handle); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "journal start"); return rc; } @@ -178,8 +185,7 @@ static inline int fsfilt_commit_async(struct obd_device *obd, int rc = obd->obd_fsops->fs_commit_async(inode, handle, wait_handle); CDEBUG(D_INFO, "committing handle %p (async)\n", *wait_handle); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "journal start"); return rc; } @@ -190,8 +196,7 @@ static inline int fsfilt_commit_wait(struct obd_device *obd, unsigned long now = jiffies; int rc = obd->obd_fsops->fs_commit_wait(inode, handle); CDEBUG(D_INFO, "waiting for completion %p\n", handle); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "journal start"); return rc; } @@ -201,8 +206,7 @@ static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, unsigned long now = jiffies; int rc; rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long setattr time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "setattr"); return rc; } diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index e2f74a5..53a172c 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -281,7 +281,7 @@ struct obdo { obd_count o_nlink; /* brw: checksum */ obd_count o_generation; obd_flag o_valid; /* hot fields in this obdo */ - obd_count o_misc; + obd_count o_misc; /* brw: o_dropped */ __u32 o_easize; /* epoch in ost writes */ char o_inline[OBD_INLINESZ]; /* fid in ost writes */ }; @@ -410,6 +410,9 @@ extern void lustre_swab_obd_statfs (struct obd_statfs *os); #define OBD_OBJECT_EOF 0xffffffffffffffffULL +#define OST_MIN_PRECREATE 32 +#define OST_MAX_PRECREATE 20000 + struct obd_ioobj { obd_id ioo_id; obd_gr ioo_gr; diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h index 74be113..ecae76c 100644 --- a/lustre/include/linux/lustre_import.h +++ b/lustre/include/linux/lustre_import.h @@ -83,7 +83,8 @@ struct obd_import { int imp_invalid:1, imp_replayable:1, imp_dlm_fake:1, imp_server_timeout:1, imp_initial_recov:1, imp_force_verify:1, - imp_pingable:1, imp_resend_replay:1; + imp_pingable:1, imp_resend_replay:1, + imp_deactive:1; __u32 imp_connect_op; }; diff --git a/lustre/include/linux/lvfs.h b/lustre/include/linux/lvfs.h index 61f0cfc..5065397 100644 --- a/lustre/include/linux/lvfs.h +++ b/lustre/include/linux/lvfs.h @@ -1,9 +1,32 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * lustre VFS/process permission interface + */ + #ifndef __LVFS_H__ #define __LVFS_H__ #include -#define LL_FID_NAMELEN (16 + 1 + 8 + 1) +#define LL_FID_NAMELEN (16 + 1 + 8 + 1) #if defined __KERNEL__ #include @@ -88,6 +111,15 @@ static inline struct dentry *ll_lookup_one_len(const char *fid_name, dchild = lookup_one_len(fid_name, dparent, fid_namelen); up(&dparent->d_inode->i_sem); + if (IS_ERR(dchild) || dchild->d_inode == NULL) + return dchild; + + if (is_bad_inode(dchild->d_inode)) { + CERROR("bad inode returned %lu/%u\n", + dchild->d_inode->i_ino, dchild->d_inode->i_generation); + dput(dchild); + dchild = ERR_PTR(-ENOENT); + } return dchild; } diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index c3eb266..d8869b1 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -148,6 +148,7 @@ struct oig_callback_context { * callees of this method are encouraged to abort their state * in the oig. This may be called multiple times. */ void (*occ_interrupted)(struct oig_callback_context *occ); + int interrupted; }; /* if we find more consumers this could be generalized */ @@ -214,7 +215,7 @@ struct mds_server_data; #define OSC_MAX_RIF_DEFAULT 8 #define OSC_MAX_RIF_MAX 64 -#define OSC_MAX_DIRTY_DEFAULT 8 +#define OSC_MAX_DIRTY_DEFAULT 32 #define OSC_MAX_DIRTY_MB_MAX 512 /* totally arbitrary */ struct mdc_rpc_lock; diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index f75d9ea..d70264c 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -156,102 +156,23 @@ void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid); int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare); void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj); -static inline int obd_check_conn(struct lustre_handle *conn) -{ - struct obd_device *obd; - if (!conn) { - CERROR("NULL conn\n"); - RETURN(-ENOTCONN); - } - - obd = class_conn2obd(conn); - if (!obd) { - CERROR("NULL obd\n"); - RETURN(-ENODEV); - } - - if (!obd->obd_attached) { - CERROR("obd %d not attached\n", obd->obd_minor); - RETURN(-ENODEV); - } - - if (!obd->obd_set_up) { - CERROR("obd %d not setup\n", obd->obd_minor); - RETURN(-ENODEV); - } - - if (!obd->obd_type) { - CERROR("obd %d not typed\n", obd->obd_minor); - RETURN(-ENODEV); - } - - if (!obd->obd_type->typ_ops) { - CERROR("obd_check_conn: obd %d no operations\n", - obd->obd_minor); - RETURN(-EOPNOTSUPP); - } - return 0; -} - - #define OBT(dev) (dev)->obd_type #define OBP(dev, op) (dev)->obd_type->typ_ops->o_ ## op #define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op -/* Ensure obd_setup: used for disconnect which might be called while - an obd is stopping. */ -#define OBD_CHECK_SETUP(conn, exp) \ -do { \ - if (!(conn)) { \ - CERROR("NULL connection\n"); \ - RETURN(-EINVAL); \ - } \ - \ - exp = class_conn2export(conn); \ - if (!(exp)) { \ - CERROR("No export for conn "LPX64"\n", (conn)->cookie); \ - RETURN(-EINVAL); \ - } \ - \ - if (!(exp)->exp_obd->obd_set_up) { \ - CERROR("Device %d not setup\n", \ - (exp)->exp_obd->obd_minor); \ - class_export_put(exp); \ - RETURN(-EINVAL); \ - } \ -} while (0) - -/* Ensure obd_setup and !obd_stopping. */ -#define OBD_CHECK_ACTIVE(conn, exp) \ -do { \ - if (!(conn)) { \ - CERROR("NULL connection\n"); \ - RETURN(-EINVAL); \ - } \ - \ - exp = class_conn2export(conn); \ - if (!(exp)) { \ - CERROR("No export for conn "LPX64"\n", (conn)->cookie); \ - RETURN(-EINVAL); \ - } \ - \ - if (!(exp)->exp_obd->obd_set_up || (exp)->exp_obd->obd_stopping) { \ - CERROR("Device %d not setup\n", \ - (exp)->exp_obd->obd_minor); \ - class_export_put(exp); \ - RETURN(-EINVAL); \ - } \ -} while (0) - /* Ensure obd_setup: used for cleanup which must be called while obd is stopping */ -#define OBD_CHECK_DEV_STOPPING(obd) \ +#define OBD_CHECK_DEV(obd) \ do { \ if (!(obd)) { \ CERROR("NULL device\n"); \ RETURN(-ENODEV); \ } \ - \ +} while (0) + +#define OBD_CHECK_DEV_STOPPING(obd) \ +do { \ + OBD_CHECK_DEV(obd); \ if (!(obd)->obd_set_up) { \ CERROR("Device %d not setup\n", \ (obd)->obd_minor); \ @@ -268,11 +189,7 @@ do { \ /* ensure obd_setup and !obd_stopping */ #define OBD_CHECK_DEV_ACTIVE(obd) \ do { \ - if (!(obd)) { \ - CERROR("NULL device\n"); \ - RETURN(-ENODEV); \ - } \ - \ + OBD_CHECK_DEV(obd); \ if (!(obd)->obd_set_up || (obd)->obd_stopping) { \ CERROR("Device %d not setup\n", \ (obd)->obd_minor); \ @@ -982,6 +899,11 @@ static inline void obd_import_event(struct obd_device *obd, struct obd_import *imp, enum obd_import_event event) { + if (!obd) { + CERROR("NULL device\n"); + EXIT; + return; + } if (obd->obd_set_up && OBP(obd, import_event)) { OBD_COUNTER_INCREMENT(obd, import_event); OBP(obd, import_event)(obd, imp, event); @@ -992,6 +914,7 @@ static inline int obd_notify(struct obd_device *obd, struct obd_device *watched, int active) { + OBD_CHECK_DEV(obd); if (!obd->obd_set_up) { CERROR("obd %s not set up\n", obd->obd_name); return -EINVAL; @@ -1010,6 +933,7 @@ static inline int obd_register_observer(struct obd_device *obd, struct obd_device *observer) { ENTRY; + OBD_CHECK_DEV(obd); if (obd->obd_observer && observer) RETURN(-EALREADY); obd->obd_observer = observer; diff --git a/lustre/kernel_patches/patches/blkdev_tunables-2.4.21-chaos.patch b/lustre/kernel_patches/patches/blkdev_tunables-2.4.21-chaos.patch new file mode 100644 index 0000000..2a834ac --- /dev/null +++ b/lustre/kernel_patches/patches/blkdev_tunables-2.4.21-chaos.patch @@ -0,0 +1,34 @@ +--- ./drivers/addon/qla2200/qla2x00.h 2004-07-26 12:52:08.000000000 +0100 ++++ ./drivers/addon/qla2200/qla2x00.h 2004-07-26 12:58:42.000000000 +0100 +@@ -3208,7 +3208,7 @@ void qla2x00_setup(char *s); + /* Kernel version specific template additions */ + + /* Number of segments 1 - 65535 */ +-#define SG_SEGMENTS 32 /* Cmd entry + 6 continuations */ ++#define SG_SEGMENTS 512 /* Cmd entry + 6 continuations */ + + /* + * Scsi_Host_template (see hosts.h) +@@ -3222,7 +3222,7 @@ void qla2x00_setup(char *s); + * + */ + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,8) +-#define TEMPLATE_MAX_SECTORS max_sectors: 512, ++#define TEMPLATE_MAX_SECTORS max_sectors: 2048, + #else + #define TEMPLATE_MAX_SECTORS + #endif +--- ./include/linux/blkdev.h 2004-07-26 12:53:11.000000000 +0100 ++++ ./include/linux/blkdev.h 2004-07-26 13:12:42.000000000 +0100 +@@ -255,9 +255,9 @@ extern int * max_segments[MAX_BLKDEV]; + + extern char * blkdev_varyio[MAX_BLKDEV]; + +-#define MAX_SEGMENTS 128 ++#define MAX_SEGMENTS 256 + #define MAX_SECTORS 255 +-#define MAX_SUPERBH 32768 /* must fit info ->b_size right now */ ++#define MAX_SUPERBH (1<<20) + + /* + * bh abuse :/ diff --git a/lustre/kernel_patches/patches/ext3-ialloc-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-ialloc-2.4.21-chaos.patch new file mode 100644 index 0000000..dd50ad0 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-ialloc-2.4.21-chaos.patch @@ -0,0 +1,236 @@ +--- ./fs/ext3/ialloc.c.orig 2004-08-02 13:26:38.000000000 -0700 ++++ ./fs/ext3/ialloc.c 2004-09-03 14:57:14.000000000 -0700 +@@ -328,21 +328,143 @@ + * directories already is chosen. + * + * For other inodes, search forward from the parent directory's block +- * group to find a free inode. ++ * group to find a free inode in a group with some free blocks. + */ ++static int find_group_dir(struct super_block *sb, const struct inode *parent, ++ struct ext3_group_desc **best_desc, ++ struct buffer_head **best_bh) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int ngroups = sbi->s_groups_count; ++ int avefreei; ++ struct ext3_group_desc *desc; ++ struct buffer_head *bh; ++ int group, best_group = -1, ndir_best = 999999999; ++ ++ *best_desc = NULL; ++ *best_bh = NULL; ++ ++ avefreei = le32_to_cpu(sbi->s_es->s_free_inodes_count) / ++ sbi->s_groups_count / ngroups; ++ ++ for (group = 0; group < ngroups; group++) { ++ desc = ext3_get_group_desc(sb, group, &bh); ++ if (!desc || !desc->bg_free_inodes_count) ++ continue; ++ if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) ++ continue; ++ if (le16_to_cpu(desc->bg_used_dirs_count) > ndir_best) ++ continue; ++ if (!*best_desc || ++ (le16_to_cpu(desc->bg_free_blocks_count) > ++ le16_to_cpu((*best_desc)->bg_free_blocks_count))) { ++ *best_bh = bh; ++ *best_desc = desc; ++ best_group = group; ++ ndir_best = le16_to_cpu(desc->bg_used_dirs_count); ++ } ++ } ++ ++ return best_group; ++} ++ ++static int find_group_other(struct super_block *sb, const struct inode *parent, ++ struct ext3_group_desc **best_desc, ++ struct buffer_head **best_bh) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int parent_group = EXT3_I(parent)->i_block_group; ++ int ngroups = sbi->s_groups_count; ++ int avefreeb; ++ struct ext3_group_desc *desc; ++ struct buffer_head *bh; ++ int group, i, best_group = -1; ++ ++ *best_desc = NULL; ++ *best_bh = NULL; ++ ++ /* ++ * Try to place the inode in its parent directory ++ */ ++ group = parent_group; ++ desc = ext3_get_group_desc (sb, group, &bh); ++ if (desc && le16_to_cpu(desc->bg_free_inodes_count) && ++ le16_to_cpu(desc->bg_free_blocks_count)) { ++ *best_desc = desc; ++ *best_bh = bh; ++ return group; ++ } ++ ++ /* ++ * We're going to place this inode in a different blockgroup from its ++ * parent. We want to cause files in a common directory to all land in ++ * the same blockgroup if it has space. But we want files which are ++ * in a different directory which shares a blockgroup with our parent ++ * to land in a different blockgroup. ++ * ++ * So add our directory's i_ino into the starting point for the hash. ++ */ ++ group = (group + parent->i_ino) % ngroups; ++ ++ avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) / ++ sbi->s_groups_count / ngroups; ++ ++ /* ++ * Use a quadratic hash to find a group with a free inode and some free ++ * blocks. ++ */ ++ for (i = 1; i < ngroups; i <<= 1) { ++ group += i; ++ if (group >= ngroups) ++ group -= ngroups; ++ desc = ext3_get_group_desc(sb, group, &bh); ++ if (!desc || !desc->bg_free_inodes_count) ++ continue; ++ if (le16_to_cpu(desc->bg_free_blocks_count) > avefreeb) { ++ *best_bh = bh; ++ *best_desc = desc; ++ return group; ++ } ++ } ++ ++ /* ++ * That failed: try linear search for a group with free inodes and ++ * preferrably free blocks, returning as soon as we find a good one. ++ */ ++ group = sbi->s_last_group; ++ for (i = 0; i < ngroups; i++) { ++ if (++group >= ngroups) ++ group = 0; ++ desc = ext3_get_group_desc(sb, group, &bh); ++ if (!desc || !desc->bg_free_inodes_count) ++ continue; ++ if (!*best_desc || ++ (le16_to_cpu(desc->bg_free_blocks_count) > ++ le16_to_cpu((*best_desc)->bg_free_blocks_count))) { ++ *best_bh = bh; ++ *best_desc = desc; ++ best_group = group; ++ if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb) ++ break; ++ } ++ } ++ sbi->s_last_group = best_group; ++ ++ return best_group; ++} ++ + struct inode * ext3_new_inode(handle_t *handle, const struct inode * dir, + int mode, unsigned long goal) + { + struct super_block * sb; + struct buffer_head * bh; + struct buffer_head * bh2; +- int i, j, avefreei; ++ int i, j; + struct inode * inode; + int bitmap_nr; + struct ext3_inode_info *ei; + struct ext3_sb_info *sbi; + struct ext3_group_desc * gdp; +- struct ext3_group_desc * tmp; + struct ext3_super_block * es; + struct ext3_iloc iloc; + int err = 0; +@@ -396,72 +518,10 @@ + } + + repeat: +- gdp = NULL; +- i = 0; +- +- if (S_ISDIR(mode)) { +- avefreei = le32_to_cpu(es->s_free_inodes_count) / +- sbi->s_groups_count; +- if (!gdp) { +- for (j = 0; j < sbi->s_groups_count; j++) { +- struct buffer_head *temp_buffer; +- tmp = ext3_get_group_desc (sb, j, &temp_buffer); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count) && +- le16_to_cpu(tmp->bg_free_inodes_count) >= +- avefreei) { +- if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > +- le16_to_cpu(gdp->bg_free_blocks_count))) { +- i = j; +- gdp = tmp; +- bh2 = temp_buffer; +- } +- } +- } +- } +- } else { +- /* +- * Try to place the inode in its parent directory +- */ +- i = EXT3_I(dir)->i_block_group; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) +- gdp = tmp; +- else +- { +- /* +- * Use a quadratic hash to find a group with a +- * free inode +- */ +- for (j = 1; j < sbi->s_groups_count; j <<= 1) { +- i += j; +- if (i >= sbi->s_groups_count) +- i -= sbi->s_groups_count; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count)) { +- gdp = tmp; +- break; +- } +- } +- } +- if (!gdp) { +- /* +- * That failed: try linear search for a free inode +- */ +- i = EXT3_I(dir)->i_block_group + 1; +- for (j = 2; j < sbi->s_groups_count; j++) { +- if (++i >= sbi->s_groups_count) +- i = 0; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count)) { +- gdp = tmp; +- break; +- } +- } +- } +- } ++ if (S_ISDIR(mode)) ++ i = find_group_dir(sb, dir, &gdp, &bh2); ++ else ++ i = find_group_other(sb, dir, &gdp, &bh2); + + err = -ENOSPC; + if (!gdp) +--- linux/include/linux/ext3_fs_sb.h.orig 2004-08-26 13:28:53.000000000 -0600 ++++ linux/include/linux/ext3_fs_sb.h 2004-08-31 11:04:27.000000000 -0600 +@@ -45,6 +45,7 @@ struct ext3_sb_info { + unsigned long s_gdb_count; /* Number of group descriptor blocks */ + unsigned long s_desc_per_block; /* Number of group descriptors per block */ + unsigned long s_groups_count; /* Number of groups in the fs */ ++ unsigned long s_last_group; /* Last group used for inode allocation */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_group_desc; diff --git a/lustre/kernel_patches/patches/ext3-ialloc-2.4.24.patch b/lustre/kernel_patches/patches/ext3-ialloc-2.4.24.patch new file mode 100644 index 0000000..ac27901 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-ialloc-2.4.24.patch @@ -0,0 +1,239 @@ +Index: lum/fs/ext3/ialloc.c +=================================================================== +--- lum.orig/fs/ext3/ialloc.c 2004-08-26 13:14:35.000000000 -0600 ++++ lum/fs/ext3/ialloc.c 2004-08-31 15:00:35.000000000 -0600 +@@ -327,8 +327,131 @@ int ext3_itable_block_used(struct super_ + * directories already is chosen. + * + * For other inodes, search forward from the parent directory's block +- * group to find a free inode. ++ * group to find a free inode in a group with some free blocks. + */ ++static int find_group_dir(struct super_block *sb, const struct inode *parent, ++ struct ext3_group_desc **best_desc, ++ struct buffer_head **best_bh) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int ngroups = sbi->s_groups_count; ++ int avefreei; ++ struct ext3_group_desc *desc; ++ struct buffer_head *bh; ++ int group, best_group = -1, ndir_best = 999999999; ++ ++ *best_desc = NULL; ++ *best_bh = NULL; ++ ++ avefreei = le32_to_cpu(sbi->s_es->s_free_inodes_count) / ++ sbi->s_groups_count / ngroups; ++ ++ for (group = 0; group < ngroups; group++) { ++ desc = ext3_get_group_desc(sb, group, &bh); ++ if (!desc || !desc->bg_free_inodes_count) ++ continue; ++ if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) ++ continue; ++ if (le16_to_cpu(desc->bg_used_dirs_count) > ndir_best) ++ continue; ++ if (!*best_desc || ++ (le16_to_cpu(desc->bg_free_blocks_count) > ++ le16_to_cpu((*best_desc)->bg_free_blocks_count))) { ++ *best_bh = bh; ++ *best_desc = desc; ++ best_group = group; ++ ndir_best = le16_to_cpu(desc->bg_used_dirs_count); ++ } ++ } ++ ++ return best_group; ++} ++ ++static int find_group_other(struct super_block *sb, const struct inode *parent, ++ struct ext3_group_desc **best_desc, ++ struct buffer_head **best_bh) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int parent_group = EXT3_I(parent)->i_block_group; ++ int ngroups = sbi->s_groups_count; ++ int avefreeb; ++ struct ext3_group_desc *desc; ++ struct buffer_head *bh; ++ int group, i, best_group = -1; ++ ++ *best_desc = NULL; ++ *best_bh = NULL; ++ ++ /* ++ * Try to place the inode in its parent directory ++ */ ++ group = parent_group; ++ desc = ext3_get_group_desc (sb, group, &bh); ++ if (desc && le16_to_cpu(desc->bg_free_inodes_count) && ++ le16_to_cpu(desc->bg_free_blocks_count)) { ++ *best_desc = desc; ++ *best_bh = bh; ++ return group; ++ } ++ ++ /* ++ * We're going to place this inode in a different blockgroup from its ++ * parent. We want to cause files in a common directory to all land in ++ * the same blockgroup if it has space. But we want files which are ++ * in a different directory which shares a blockgroup with our parent ++ * to land in a different blockgroup. ++ * ++ * So add our directory's i_ino into the starting point for the hash. ++ */ ++ group = (group + parent->i_ino) % ngroups; ++ ++ avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) / ++ sbi->s_groups_count / ngroups; ++ ++ /* ++ * Use a quadratic hash to find a group with a free inode and some free ++ * blocks. ++ */ ++ for (i = 1; i < ngroups; i <<= 1) { ++ group += i; ++ if (group >= ngroups) ++ group -= ngroups; ++ desc = ext3_get_group_desc(sb, group, &bh); ++ if (!desc || !desc->bg_free_inodes_count) ++ continue; ++ if (le16_to_cpu(desc->bg_free_blocks_count) > avefreeb) { ++ *best_bh = bh; ++ *best_desc = desc; ++ return group; ++ } ++ } ++ ++ /* ++ * That failed: try linear search for a group with free inodes and ++ * preferrably free blocks, returning as soon as we find a good one. ++ */ ++ group = sbi->s_last_group; ++ for (i = 0; i < ngroups; i++) { ++ if (++group >= ngroups) ++ group = 0; ++ desc = ext3_get_group_desc(sb, group, &bh); ++ if (!desc || !desc->bg_free_inodes_count) ++ continue; ++ if (!*best_desc || ++ (le16_to_cpu(desc->bg_free_blocks_count) > ++ le16_to_cpu((*best_desc)->bg_free_blocks_count))) { ++ *best_bh = bh; ++ *best_desc = desc; ++ best_group = group; ++ if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb) ++ break; ++ } ++ } ++ sbi->s_last_group = best_group; ++ ++ return best_group; ++} ++ + struct inode * ext3_new_inode (handle_t *handle, + const struct inode * dir, int mode, + unsigned long goal) +@@ -336,11 +459,10 @@ struct inode * ext3_new_inode (handle_t + struct super_block * sb; + struct buffer_head * bh; + struct buffer_head * bh2; +- int i, j, avefreei; ++ int i, j; + struct inode * inode; + int bitmap_nr; + struct ext3_group_desc * gdp; +- struct ext3_group_desc * tmp; + struct ext3_super_block * es; + struct ext3_iloc iloc; + int err = 0; +@@ -392,72 +514,10 @@ struct inode * ext3_new_inode (handle_t + } + + repeat: +- gdp = NULL; +- i = 0; +- +- if (S_ISDIR(mode)) { +- avefreei = le32_to_cpu(es->s_free_inodes_count) / +- sb->u.ext3_sb.s_groups_count; +- if (!gdp) { +- for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { +- struct buffer_head *temp_buffer; +- tmp = ext3_get_group_desc (sb, j, &temp_buffer); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count) && +- le16_to_cpu(tmp->bg_free_inodes_count) >= +- avefreei) { +- if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > +- le16_to_cpu(gdp->bg_free_blocks_count))) { +- i = j; +- gdp = tmp; +- bh2 = temp_buffer; +- } +- } +- } +- } +- } else { +- /* +- * Try to place the inode in its parent directory +- */ +- i = dir->u.ext3_i.i_block_group; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) +- gdp = tmp; +- else +- { +- /* +- * Use a quadratic hash to find a group with a +- * free inode +- */ +- for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { +- i += j; +- if (i >= sb->u.ext3_sb.s_groups_count) +- i -= sb->u.ext3_sb.s_groups_count; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count)) { +- gdp = tmp; +- break; +- } +- } +- } +- if (!gdp) { +- /* +- * That failed: try linear search for a free inode +- */ +- i = dir->u.ext3_i.i_block_group + 1; +- for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { +- if (++i >= sb->u.ext3_sb.s_groups_count) +- i = 0; +- tmp = ext3_get_group_desc (sb, i, &bh2); +- if (tmp && +- le16_to_cpu(tmp->bg_free_inodes_count)) { +- gdp = tmp; +- break; +- } +- } +- } +- } ++ if (S_ISDIR(mode)) ++ i = find_group_dir(sb, dir, &gdp, &bh2); ++ else ++ i = find_group_other(sb, dir, &gdp, &bh2); + + err = -ENOSPC; + if (!gdp) +Index: lum/include/linux/ext3_fs_sb.h +=================================================================== +--- lum.orig/include/linux/ext3_fs_sb.h 2004-08-26 13:28:53.000000000 -0600 ++++ lum/include/linux/ext3_fs_sb.h 2004-08-31 11:04:27.000000000 -0600 +@@ -45,6 +45,7 @@ struct ext3_sb_info { + unsigned long s_gdb_count; /* Number of group descriptor blocks */ + unsigned long s_desc_per_block; /* Number of group descriptors per block */ + unsigned long s_groups_count; /* Number of groups in the fs */ ++ unsigned long s_last_group; /* Last group used for inode allocation */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_group_desc; diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch new file mode 100644 index 0000000..debe4b3 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch @@ -0,0 +1,174 @@ +Index: 69chaos/fs/ext3/namei.c +=================================================================== +--- 69chaos.orig/fs/ext3/namei.c 2004-08-24 23:56:04.000000000 -0700 ++++ 69chaos/fs/ext3/namei.c 2004-08-24 23:57:20.000000000 -0700 +@@ -1542,11 +1542,16 @@ + static inline void ext3_inc_count(handle_t *handle, struct inode *inode) + { + inode->i_nlink++; ++ if (is_dx(inode) && inode->i_nlink > 1) { ++ if (inode->i_nlink >= 65000) /* limit is 16-bit i_links_count */ ++ inode->i_nlink = 1; ++ } + } + + static inline void ext3_dec_count(handle_t *handle, struct inode *inode) + { +- inode->i_nlink--; ++ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) ++ inode->i_nlink--; + } + + static int ext3_add_nondir(handle_t *handle, +@@ -1650,7 +1655,7 @@ + struct ext3_dir_entry_2 * de; + int err; + +- if (dir->i_nlink >= EXT3_LINK_MAX) ++ if (EXT3_DIR_LINK_MAXED(dir)) + return -EMLINK; + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + +@@ -1672,7 +1677,7 @@ + inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; + dir_block = ext3_bread (handle, inode, 0, 1, &err); + if (!dir_block) { +- inode->i_nlink--; /* is this nlink == 0? */ ++ ext3_dec_count(handle, inode); /* is this nlink == 0? */ + ext3_mark_inode_dirty(handle, inode); + iput (inode); + goto out_stop; +@@ -1704,7 +1709,7 @@ + iput (inode); + goto out_stop; + } +- dir->i_nlink++; ++ ext3_inc_count(handle, dir); + ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); + d_instantiate(dentry, inode); +@@ -1765,10 +1770,11 @@ + } + de = (struct ext3_dir_entry_2 *) bh->b_data; + } +- if (!ext3_check_dir_entry ("empty_dir", inode, de, bh, +- offset)) { +- brelse (bh); +- return 1; ++ if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) { ++ /* On error skip the de and offset to the next block. */ ++ de = (void *)(bh->b_data + sb->s_blocksize); ++ offset = (offset | (sb->s_blocksize - 1)) + 1; ++ continue; + } + if (le32_to_cpu(de->inode)) { + brelse (bh); +@@ -1960,14 +1966,14 @@ + retval = ext3_delete_entry(handle, dir, de, bh); + if (retval) + goto end_rmdir; +- if (inode->i_nlink != 2) +- ext3_warning (inode->i_sb, "ext3_rmdir", +- "empty directory has nlink!=2 (%d)", +- inode->i_nlink); ++ if (!EXT3_DIR_LINK_EMPTY(inode)) ++ ext3_warning(inode->i_sb, __FUNCTION__, ++ "empty directory has too many links (%d)", ++ inode->i_nlink); + inode->i_version = ++event; + inode->i_nlink = 0; + ext3_orphan_add(handle, inode); +- dir->i_nlink--; ++ ext3_dec_count(handle, dir); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_mark_inode_dirty(handle, inode); + ext3_update_dx_flag(dir); +@@ -2019,7 +2025,7 @@ + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); +- inode->i_nlink--; ++ ext3_dec_count(handle, inode); + if (!inode->i_nlink) + ext3_orphan_add(handle, inode); + inode->i_ctime = dir->i_ctime; +@@ -2111,9 +2117,8 @@ + if (S_ISDIR(inode->i_mode)) + return -EPERM; + +- if (inode->i_nlink >= EXT3_LINK_MAX) { ++ if (EXT3_DIR_LINK_MAXED(inode)) + return -EMLINK; +- } + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + + EXT3_INDEX_EXTRA_TRANS_BLOCKS); +@@ -2197,8 +2202,8 @@ + if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) + goto end_rename; + retval = -EMLINK; +- if (!new_inode && new_dir!=old_dir && +- new_dir->i_nlink >= EXT3_LINK_MAX) ++ if (!new_inode && new_dir != old_dir && ++ EXT3_DIR_LINK_MAXED(new_dir)) + goto end_rename; + } + if (!new_bh) { +@@ -2256,7 +2261,7 @@ + } + + if (new_inode) { +- new_inode->i_nlink--; ++ ext3_dec_count(handle, new_inode); + new_inode->i_ctime = CURRENT_TIME; + } + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; +@@ -2267,11 +2272,11 @@ + PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); + BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); + ext3_journal_dirty_metadata(handle, dir_bh); +- old_dir->i_nlink--; ++ ext3_dec_count(handle, old_dir); + if (new_inode) { +- new_inode->i_nlink--; ++ ext3_dec_count(handle, new_inode); + } else { +- new_dir->i_nlink++; ++ ext3_inc_count(handle, new_dir); + ext3_update_dx_flag(new_dir); + ext3_mark_inode_dirty(handle, new_dir); + } +Index: 69chaos/include/linux/ext3_fs.h +=================================================================== +--- 69chaos.orig/include/linux/ext3_fs.h 2004-08-24 23:55:45.000000000 -0700 ++++ 69chaos/include/linux/ext3_fs.h 2004-08-24 23:56:47.000000000 -0700 +@@ -44,7 +44,7 @@ + /* + * Always enable hashed directories + */ +-#define CONFIG_EXT3_INDEX ++#define CONFIG_EXT3_INDEX 1 + + /* + * Debug code +@@ -582,14 +582,15 @@ + */ + + #ifdef CONFIG_EXT3_INDEX +- #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ +- EXT3_FEATURE_COMPAT_DIR_INDEX) && \ ++#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ ++ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ + (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) +-#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) +-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) ++#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX) ++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \ ++ (is_dx(dir) && (dir)->i_nlink == 1)) + #else + #define is_dx(dir) 0 +-#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) ++#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) + #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) + #endif + diff --git a/lustre/kernel_patches/patches/ext3-o_direct-1-2.4.18-chaos.patch b/lustre/kernel_patches/patches/ext3-o_direct-1-2.4.18-chaos.patch index 932ae84..759aaa7 100644 --- a/lustre/kernel_patches/patches/ext3-o_direct-1-2.4.18-chaos.patch +++ b/lustre/kernel_patches/patches/ext3-o_direct-1-2.4.18-chaos.patch @@ -112,7 +112,7 @@ + handle_t *handle = NULL; + int ret; + int orphan = 0; -+ loff_t offset = blocknr << inode->i_blkbits; /* ugh */ ++ loff_t offset = (loff_t)blocknr << inode->i_blkbits; /* ugh */ + ssize_t count = iobuf->length; /* ditto */ + + if (rw == WRITE) { diff --git a/lustre/kernel_patches/patches/ext3-o_direct-1.2.4.20-rh.patch b/lustre/kernel_patches/patches/ext3-o_direct-1.2.4.20-rh.patch index 1caa289..3aff4c5 100644 --- a/lustre/kernel_patches/patches/ext3-o_direct-1.2.4.20-rh.patch +++ b/lustre/kernel_patches/patches/ext3-o_direct-1.2.4.20-rh.patch @@ -113,7 +113,7 @@ Index: linux-2.4.20-rh/fs/ext3/inode.c + handle_t *handle = NULL; + int ret; + int orphan = 0; -+ loff_t offset = blocknr << inode->i_blkbits; /* ugh */ ++ loff_t offset = (loff_t)blocknr << inode->i_blkbits; /* ugh */ + ssize_t count = iobuf->length; /* ditto */ + + if (rw == WRITE) { diff --git a/lustre/kernel_patches/patches/ext3-o_direct-1.patch b/lustre/kernel_patches/patches/ext3-o_direct-1.patch index ac71640..e3285cf 100644 --- a/lustre/kernel_patches/patches/ext3-o_direct-1.patch +++ b/lustre/kernel_patches/patches/ext3-o_direct-1.patch @@ -111,7 +111,7 @@ + handle_t *handle = NULL; + int ret; + int orphan = 0; -+ loff_t offset = blocknr << inode->i_blkbits; /* ugh */ ++ loff_t offset = (loff_t)blocknr << inode->i_blkbits; /* ugh */ + ssize_t count = iobuf->length; /* ditto */ + + if (rw == WRITE) { diff --git a/lustre/kernel_patches/patches/lustre_version.patch b/lustre/kernel_patches/patches/lustre_version.patch index 5c1887d..6df5d04 100644 --- a/lustre/kernel_patches/patches/lustre_version.patch +++ b/lustre/kernel_patches/patches/lustre_version.patch @@ -1,4 +1,6 @@ +Version 40: >32000 subdirectories support for ext3 (b=3244) Version 39: add EXPORT_SYMBOL(smp_num_siblings) to vanilla-2.4.24 (b=3966) + eebperf (unfragmented 1MB write/read support) Version 38: drop dentry ref in ext3_add_link from open_connect_dentry (b=3266) Version 37: fix htree rename-within-same-dir (b=3417), endianness (b=2447) Version 36: don't dput dentry after error (b=2350), zero page->private (3119) diff --git a/lustre/kernel_patches/series/bproc-2.4.20-hp-pnnl b/lustre/kernel_patches/series/bproc-2.4.20-hp-pnnl deleted file mode 100644 index 41437b8..0000000 --- a/lustre/kernel_patches/series/bproc-2.4.20-hp-pnnl +++ /dev/null @@ -1,37 +0,0 @@ -dev_read_only_hp_2.4.20.patch -exports_2.4.20-rh-hp.patch -kmem_cache_validate_hp.patch -lustre_version.patch -vfs_intent-2.4.20-hp.patch -invalidate_show.patch -export-truncate.patch -iod-stock-24-exports_hp.patch -ext-2.4-patch-1.patch -ext-2.4-patch-2.patch -ext-2.4-patch-3.patch -ext-2.4-patch-4.patch -linux-2.4.20-xattr-0.8.54-hp.patch -ext3-2.4.20-fixes.patch -ext3-2.4-ino_t.patch -ext3-largefile.patch -ext3-truncate_blocks.patch -ext3-use-after-free.patch -ext3-orphan_lock.patch -ext3-delete_thread-2.4.20.patch -ext3-noread-2.4.20.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -ext3-map_inode_page.patch -ext3-error-export.patch -iopen-2.4.20.patch -tcp-zero-copy.patch -socket-exports-vanilla.patch -add_page_private.patch -removepage-2.4.20.patch -jbd-ctx_switch.patch -jbd-flushtime.patch -jbd-get_write_access.patch -nfs_export_kernel-2.4.20-hp.patch -ext3-ea-in-inode-2.4.20.patch -bproc-patch-2.4.20 -ext3-truncate-buffer-head.patch diff --git a/lustre/kernel_patches/series/chaos-2.4.18 b/lustre/kernel_patches/series/chaos-2.4.18 deleted file mode 100644 index c69f42f..0000000 --- a/lustre/kernel_patches/series/chaos-2.4.18 +++ /dev/null @@ -1,40 +0,0 @@ -dev_read_only.patch -exports.patch -lustre_version.patch -vfs_intent-2.4.18-18-chaos65.patch -invalidate_show.patch -iod-rmap-exports.patch -export-truncate.patch -htree-ext3-2.4.18.patch -linux-2.4.18ea-0.8.26.patch -ext3-2.4-ino_t.patch -ext3-2.4.18-ino_sb_macro.patch -ext3-orphan_lock.patch -ext3-delete_thread-2.4.18.patch -extN-misc-fixup.patch -extN-noread.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -extN-2.4.18-ino_sb_fixup.patch -ext3-map_inode_page_2.4.18.patch -ext3-error-export.patch -iopen-2.4.18.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-o_direct-1-2.4.18-chaos.patch -ext3-no-write-super-chaos.patch -add_page_private.patch -ext3-extents-2.4.18-chaos.patch -ext3-extents-oflag-2.4.18-chaos.patch -ext3-raw-lookup.patch -nfs_export_kernel-2.4.18.patch -ext3-ea-in-inode-2.4.18-chaos.patch -listman-2.4.18.patch -ext3-trusted_ea-2.4.18.patch -gfp_memalloc-2.4.18-chaos.patch -ext3-xattr-ptr-arith-fix.patch -kernel_text_address-2.4.18-chaos.patch -procfs-ndynamic-2.4.patch -ext3-truncate-buffer-head.patch -inode-max-readahead-2.4.24.patch -dcache_refcount_debug.patch diff --git a/lustre/kernel_patches/series/chaos-2.4.18-pdirops b/lustre/kernel_patches/series/chaos-2.4.18-pdirops deleted file mode 100644 index c180a5b..0000000 --- a/lustre/kernel_patches/series/chaos-2.4.18-pdirops +++ /dev/null @@ -1,35 +0,0 @@ -dev_read_only.patch -exports.patch -kmem_cache_validate.patch -lustre_version.patch -vfs_intent-2.4.18-18-chaos65.patch -invalidate_show.patch -iod-rmap-exports.patch -export-truncate.patch -ext3-compat-2.4.18-chaos.patch -ext3-htree.patch -linux-2.4.18ea-0.8.26-2.patch -ext3-2.4-ino_t.patch -ext3-2.4.18-ino_sb_macro-2.patch -ext3-orphan_lock.patch -ext3-delete_thread-2.4.18-2.patch -extN-misc-fixup.patch -extN-noread.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -extN-2.4.18-ino_sb_fixup.patch -ext3-map_inode_page_2.4.18.patch -ext3-error-export.patch -iopen-2.4.18-2.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-o_direct-1-2.4.18-chaos.patch -ext3-no-write-super-chaos.patch -add_page_private.patch -dynamic-locks-2.4.18-chaos.patch -vfs-pdirops-2.4.18-chaos.patch -ext3-pdirops-2.4.18-chaos.patch -ext3-extents-2.4.18-chaos-pdirops.patch -nfs_export_kernel-2.4.18.patch -ext3-raw-lookup-pdirops.patch -ext3-truncate-buffer-head.patch diff --git a/lustre/kernel_patches/series/chaos-2.4.21 b/lustre/kernel_patches/series/chaos-2.4.21 index 15aa199..acd145c 100644 --- a/lustre/kernel_patches/series/chaos-2.4.21 +++ b/lustre/kernel_patches/series/chaos-2.4.21 @@ -36,3 +36,5 @@ pagecache-lock-2.4.21-chaos.patch ext3-truncate-buffer-head.patch inode-max-readahead-2.4.24.patch dcache_refcount_debug.patch +blkdev_tunables-2.4.21-chaos.patch +ext3-nlinks-2.4.21-chaos.patch diff --git a/lustre/kernel_patches/series/rhel-2.4.21 b/lustre/kernel_patches/series/rhel-2.4.21 new file mode 100644 index 0000000..eebd98c --- /dev/null +++ b/lustre/kernel_patches/series/rhel-2.4.21 @@ -0,0 +1,40 @@ +configurable-x86-stack-2.4.21-chaos.patch +dev_read_only_2.4.21-chaos.patch +exports_2.4.19-suse.patch +lustre_version.patch +vfs_intent-2.4.21-rhel.patch +invalidate_show-2.4.20-rh.patch +iod-rmap-exports-2.4.21-chaos.patch +export-truncate.patch +ext3-htree-2.4.21-chaos.patch +linux-2.4.21-xattr-0.8.54-chaos.patch +ext3-ino_sb_macro-2.4.21-chaos.patch +ext3-orphan_lock-2.4.22-rh.patch +ext3-delete_thread-2.4.21-chaos.patch +extN-misc-fixup.patch +ext3-noread-2.4.21-chaos.patch +extN-wantedi-2.4.21-chaos.patch +ext3-san-2.4.20.patch +extN-2.4.18-ino_sb_fixup.patch +ext3-map_inode_page_2.4.18.patch +ext3-error-export.patch +iopen-2.4.21-chaos.patch +tcp-zero-copy-2.4.21-chaos.patch +jbd-dont-account-blocks-twice.patch +jbd-commit-tricks.patch +ext3-o_direct-2.4.21-chaos.patch +ext3-no-write-super-chaos.patch +add_page_private.patch +ext3-raw-lookup.patch +nfs_export_kernel-2.4.21-chaos.patch +ext3-ea-in-inode-2.4.21-chaos.patch +listman-2.4.21-chaos.patch +gfp_memalloc-2.4.21-chaos.patch +ext3-xattr-ptr-arith-fix.patch +kernel_text_address-2.4.18-chaos.patch +pagecache-lock-2.4.21-chaos.patch +ext3-truncate-buffer-head.patch +inode-max-readahead-2.4.24.patch +dcache_refcount_debug.patch +blkdev_tunables-2.4.21-chaos.patch +ext3-nlinks-2.4.21-chaos.patch diff --git a/lustre/kernel_patches/series/snapfs-2.4.20 b/lustre/kernel_patches/series/snapfs-2.4.20 deleted file mode 100644 index ce46da4..0000000 --- a/lustre/kernel_patches/series/snapfs-2.4.20 +++ /dev/null @@ -1,57 +0,0 @@ -configurable-x86-stack-2.4.20.patch -uml-patch-2.4.20-6.patch -uml-2.4.20-do_mmap_pgoff-fix.patch -uml-2.4.20-fixes-1.patch -uml_get_kmem_end_export.patch -dev_read_only_2.4.20.patch -exports_2.4.20.patch -lustre_version.patch -vfs_intent-2.4.20-vanilla.patch -invalidate_show.patch -export-truncate.patch -iod-stock-24-exports.patch -uml_check_get_page.patch -uml_no_panic.patch -ext-2.4-patch-1.patch -ext-2.4-patch-2.patch -ext-2.4-patch-3.patch -ext-2.4-patch-4.patch -linux-2.4.20-xattr-0.8.54.patch -ext3-2.4.20-fixes.patch -ext3-2.4-ino_t.patch -ext3-largefile.patch -ext3-truncate_blocks.patch -ext3-unmount_sync.patch -ext3-use-after-free.patch -ext3-orphan_lock.patch -ext3-noread-2.4.20.patch -ext3-delete_thread-2.4.20.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -ext3-map_inode_page.patch -ext3-error-export.patch -iopen-2.4.20.patch -tcp-zero-copy.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-no-write-super.patch -add_page_private.patch -socket-exports-vanilla.patch -removepage-2.4.20.patch -jbd-ctx_switch.patch -jbd-flushtime.patch -jbd-get_write_access.patch -nfs_export_kernel-2.4.20.patch -ext3-raw-lookup.patch -ext3-ea-in-inode-2.4.20.patch -listman-2.4.20.patch -ext3-trusted_ea-2.4.20.patch -ext3-extents-2.4.20.patch -ext3-extents-in-ea-2.4.20.patch -ext3-extents-in-ea-ioctl-2.4.20.patch -ext3-record-extents-ea.patch -kernel_text_address-2.4.20-vanilla.patch -ext3-xattr-ptr-arith-fix.patch -gfp_memalloc-2.4.22.patch -loop_device_get_info.patch -ext3-snapfs-2.4.20.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.19-pre1 b/lustre/kernel_patches/series/vanilla-2.4.19-pre1 deleted file mode 100644 index 19c6974..0000000 --- a/lustre/kernel_patches/series/vanilla-2.4.19-pre1 +++ /dev/null @@ -1,48 +0,0 @@ -configurable-x86-stack-2.4.19-pre1.patch -dev_read_only_2.4.20.patch -exports_2.4.19-pre1.patch -lustre_version.patch -vfs_intent-2.4.19-pre1.patch -invalidate_show-2.4.19-pre1.patch -export-truncate.patch -iod-stock-24-exports.patch -ext3-htree-2.4.19-pre1.patch -linux-2.4.19-pre1-xattr-0.8.54.patch -ext3-2.4.20-fixes.patch -ext3-2.4-ino_t.patch -ext3-largefile.patch -ext3-truncate_blocks.patch -ext3-unmount_sync.patch -ext3-use-after-free-2.4.19-pre1.patch -ext3-orphan_lock.patch -ext3-noread-2.4.20.patch -ext3-delete_thread-2.4.20.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -ext3-map_inode_page.patch -ext3-error-export.patch -iopen-2.4.20.patch -tcp-zero-copy-2.4.19-pre1.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-no-write-super.patch -add_page_private-2.4.19-pre1.patch -socket-exports-vanilla.patch -removepage-2.4.20.patch -jbd-ctx_switch.patch -jbd-flushtime-2.4.19-suse.patch -jbd-get_write_access.patch -nfs_export_kernel-2.4.19-pre1.patch -ext3-raw-lookup.patch -ext3-ea-in-inode-2.4.20.patch -listman-2.4.20.patch -ext3-trusted_ea-2.4.20.patch -kernel_text_address-2.4.19-pre1.patch -jbd-2.4.19-pre1-jcberr.patch -seq-private-2.4.19-pre1.patch -kdev-2.4.19-pre1.patch -resched-2.4.19-pre1.patch -ext3-xattr-ptr-arith-fix.patch -gfp_memalloc-2.4.22.patch -vmalloc_to_page-2.4.19-pre1.patch -ext3-truncate-buffer-head.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.20 b/lustre/kernel_patches/series/vanilla-2.4.20 deleted file mode 100644 index d11bec0..0000000 --- a/lustre/kernel_patches/series/vanilla-2.4.20 +++ /dev/null @@ -1,54 +0,0 @@ -configurable-x86-stack-2.4.20.patch -uml-patch-2.4.20-6.patch -uml-2.4.20-do_mmap_pgoff-fix.patch -uml-2.4.20-fixes-1.patch -uml_get_kmem_end_export.patch -dev_read_only_2.4.20.patch -exports_2.4.20.patch -lustre_version.patch -vfs_intent-2.4.20-vanilla.patch -invalidate_show.patch -export-truncate.patch -iod-stock-24-exports.patch -uml_check_get_page.patch -uml_no_panic.patch -ext-2.4-patch-1.patch -ext-2.4-patch-2.patch -ext-2.4-patch-3.patch -ext-2.4-patch-4.patch -linux-2.4.20-xattr-0.8.54.patch -ext3-2.4.20-fixes.patch -ext3-2.4-ino_t.patch -ext3-largefile.patch -ext3-truncate_blocks.patch -ext3-unmount_sync.patch -ext3-use-after-free.patch -ext3-orphan_lock.patch -ext3-noread-2.4.20.patch -ext3-delete_thread-2.4.20.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -ext3-map_inode_page.patch -ext3-error-export.patch -iopen-2.4.20.patch -tcp-zero-copy.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-no-write-super.patch -add_page_private.patch -socket-exports-vanilla.patch -removepage-2.4.20.patch -jbd-ctx_switch.patch -jbd-flushtime.patch -jbd-get_write_access.patch -nfs_export_kernel-2.4.20.patch -ext3-raw-lookup.patch -ext3-ea-in-inode-2.4.20.patch -listman-2.4.20.patch -ext3-trusted_ea-2.4.20.patch -kernel_text_address-2.4.20-vanilla.patch -ext3-xattr-ptr-arith-fix.patch -gfp_memalloc-2.4.22.patch -procfs-ndynamic-2.4.patch -linux-2.4.20-filemap.patch -ext3-truncate-buffer-head.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.22 b/lustre/kernel_patches/series/vanilla-2.4.22 deleted file mode 100644 index 3d5bb6d..0000000 --- a/lustre/kernel_patches/series/vanilla-2.4.22 +++ /dev/null @@ -1,35 +0,0 @@ -configurable-x86-stack-2.4.20.patch -dev_read_only_2.4.20-rh.patch -exports_2.4.20-rh-hp.patch -lustre_version.patch -vfs_intent-2.4.20-vanilla.patch -invalidate_show.patch -export-truncate.patch -iod-stock-exports-2.4.22.patch -ext3-htree-2.4.22-rh.patch -linux-2.4.22-xattr-0.8.54.patch -ext3-orphan_lock-2.4.22-rh.patch -ext3-noread-2.4.20.patch -ext3-delete_thread-suse.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -ext3-map_inode_page.patch -ext3-error-export.patch -iopen-2.4.20.patch -tcp-zero-copy-2.4.22-rh.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-no-write-super-chaos.patch -add_page_private.patch -socket-exports-2.4.22-rh.patch -nfs_export_kernel-2.4.22.patch -ext3-raw-lookup.patch -ext3-ea-in-inode-2.4.22-rh.patch -listman-2.4.20.patch -ext3-trusted_ea-2.4.20.patch -kernel_text_address-2.4.22-vanilla.patch -gfp_memalloc-2.4.22.patch -ext3-xattr-ptr-arith-fix.patch -3.5G-address-space-2.4.22-vanilla.patch -procfs-ndynamic-2.4.patch -ext3-truncate-buffer-head.patch diff --git a/lustre/kernel_patches/which_patch b/lustre/kernel_patches/which_patch index eec5d2a..0421ab3 100644 --- a/lustre/kernel_patches/which_patch +++ b/lustre/kernel_patches/which_patch @@ -1,9 +1,8 @@ SERIES MNEMONIC COMMENT ARCH -chaos-2.4.18 linux-chaos-2.4.18 LLNL 2.4.18 chaos ~65 i386 hp-pnnl-2.4.20 linux-2.4.20-hp4_pnnl1 same as vanilla but no uml ia64 -vanilla-2.4.20 linux-2.4.20 patch with uml-2.4.20-6 um +vanilla-2.4.24 linux-2.4.24 patch with uml-2.4.24-6 um chaos-2.4.21 linux-chaos-2.4.21 same as rh-2.4.21-15.EL i386 +rhel-2.4.21 linux-2.4.21-15.3EL same as chaos-2.4.21 i386 vanilla-2.4.24 linux-2.4.24 patch with uml-2.4.24-1 um kgdb-2.5.73 linux-2.5.73 vanilla 2.5.73 with kgdb i386 -bproc-2.4.20-hp-pnnl linux-2.4.20-hp4_pnnl9 hp-pnnl + bproc i386 diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index b1c5b2f..064977b 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -394,7 +394,7 @@ restart: if (added) ldlm_flock_destroy(req, mode, *flags); - ldlm_resource_dump(res); + ldlm_resource_dump(D_OTHER, res); RETURN(LDLM_ITER_CONTINUE); } diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 8567977..2cfe883 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -98,7 +98,10 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) cli->cl_dirty = 0; cli->cl_avail_grant = 0; + /* FIXME: should limit this for the sum of all cl_dirty_max */ cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024; + if (cli->cl_dirty_max >> PAGE_SHIFT > num_physpages / 8) + cli->cl_dirty_max = num_physpages << (PAGE_SHIFT - 3); INIT_LIST_HEAD(&cli->cl_cache_waiters); INIT_LIST_HEAD(&cli->cl_loi_ready_list); INIT_LIST_HEAD(&cli->cl_loi_write_list); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 214ef40..43d45a6 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -540,6 +540,27 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data) RETURN(rc); } +static struct ldlm_lock * +find_existing_lock(struct obd_export *exp, struct lustre_handle *remote_hdl) +{ + struct obd_device *obd = exp->exp_obd; + struct list_head *iter; + + l_lock(&obd->obd_namespace->ns_lock); + list_for_each(iter, &exp->exp_ldlm_data.led_held_locks) { + struct ldlm_lock *lock; + lock = list_entry(iter, struct ldlm_lock, l_export_chain); + if (lock->l_remote_handle.cookie == remote_hdl->cookie) { + LDLM_LOCK_GET(lock); + l_unlock(&obd->obd_namespace->ns_lock); + return lock; + } + } + l_unlock(&obd->obd_namespace->ns_lock); + return NULL; +} + + int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback completion_callback, ldlm_blocking_callback blocking_callback, @@ -566,6 +587,19 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, flags = dlm_req->lock_flags; + LASSERT(req->rq_export); + + if (flags & LDLM_FL_REPLAY) { + lock = find_existing_lock(req->rq_export, + &dlm_req->lock_handle1); + if (lock != NULL) { + DEBUG_REQ(D_HA, req, "found existing lock cookie "LPX64, + lock->l_handle.h_cookie); + GOTO(existing_lock, rc = 0); + } + + } + /* The lock's callback data might be set in the policy function */ lock = ldlm_lock_create(obddev->obd_namespace, &dlm_req->lock_handle2, dlm_req->lock_desc.l_resource.lr_name, @@ -581,8 +615,6 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, sizeof(lock->l_remote_handle)); LDLM_DEBUG(lock, "server-side enqueue handler, new lock created"); - LASSERT(req->rq_export); - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_BLOCKED, obd_timeout * 2); l_lock(&lock->l_resource->lr_namespace->ns_lock); if (req->rq_export->exp_failed) { @@ -595,6 +627,8 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, &lock->l_export->exp_ldlm_data.led_held_locks); l_unlock(&lock->l_resource->lr_namespace->ns_lock); +existing_lock: + if (flags & LDLM_FL_HAS_INTENT) { /* In this case, the reply buffer is allocated deep in * local_lock_enqueue by the policy function. */ @@ -1377,7 +1411,7 @@ static int ldlm_cleanup(int force) if (!list_empty(&ldlm_namespace_list)) { CERROR("ldlm still has namespaces; clean these up first.\n"); - ldlm_dump_all_namespaces(); + ldlm_dump_all_namespaces(D_DLMTRACE); RETURN(-EBUSY); } diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index bf77461..26da691 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -48,15 +48,17 @@ int ldlm_expired_completion_wait(void *data) struct obd_device *obd; if (lock->l_conn_export == NULL) { - static unsigned long next_dump = 0; + static unsigned long next_dump = 0, last_dump = 0; LDLM_ERROR(lock, "lock timed out; not entering recovery in " "server code, just going back to sleep"); if (time_after(jiffies, next_dump)) { - ldlm_namespace_dump(lock->l_resource->lr_namespace); - if (next_dump == 0) - portals_debug_dumplog(); + last_dump = next_dump; next_dump = jiffies + 300 * HZ; + ldlm_namespace_dump(D_DLMTRACE, + lock->l_resource->lr_namespace); + if (last_dump == 0) + portals_debug_dumplog(); } RETURN(0); } diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index ac406c7..6cefa75 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -33,7 +33,7 @@ kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab; -spinlock_t ldlm_namespace_lock = SPIN_LOCK_UNLOCKED; +DECLARE_MUTEX(ldlm_namespace_lock); struct list_head ldlm_namespace_list = LIST_HEAD_INIT(ldlm_namespace_list); struct proc_dir_entry *ldlm_type_proc_dir = NULL; struct proc_dir_entry *ldlm_ns_proc_dir = NULL; @@ -43,7 +43,7 @@ struct proc_dir_entry *ldlm_svc_proc_dir = NULL; static int ldlm_proc_dump_ns(struct file *file, const char *buffer, unsigned long count, void *data) { - ldlm_dump_all_namespaces(); + ldlm_dump_all_namespaces(D_DLMTRACE); RETURN(count); } @@ -251,9 +251,9 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client) ns->ns_nr_unused = 0; ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE; - spin_lock(&ldlm_namespace_lock); + down(&ldlm_namespace_lock); list_add(&ns->ns_list_chain, &ldlm_namespace_list); - spin_unlock(&ldlm_namespace_lock); + up(&ldlm_namespace_lock); #ifdef __KERNEL__ ldlm_proc_namespace(ns); #endif @@ -362,7 +362,7 @@ int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags) CERROR("Resource refcount nonzero (%d) after " "lock cleanup; forcing cleanup.\n", atomic_read(&res->lr_refcount)); - ldlm_resource_dump(res); + ldlm_resource_dump(D_ERROR, res); atomic_set(&res->lr_refcount, 1); ldlm_resource_putref(res); } @@ -379,10 +379,9 @@ int ldlm_namespace_free(struct ldlm_namespace *ns, int force) if (!ns) RETURN(ELDLM_OK); - spin_lock(&ldlm_namespace_lock); + down(&ldlm_namespace_lock); list_del(&ns->ns_list_chain); - - spin_unlock(&ldlm_namespace_lock); + up(&ldlm_namespace_lock); /* At shutdown time, don't call the cancellation callback */ ldlm_namespace_cleanup(ns, 0); @@ -428,10 +427,9 @@ static struct ldlm_resource *ldlm_resource_new(void) struct ldlm_resource *res; OBD_SLAB_ALLOC(res, ldlm_resource_slab, SLAB_NOFS, sizeof *res); - if (res == NULL) { - LBUG(); + if (res == NULL) return NULL; - } + memset(res, 0, sizeof(*res)); INIT_LIST_HEAD(&res->lr_children); @@ -459,10 +457,8 @@ ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent, "type: %d", type); res = ldlm_resource_new(); - if (!res) { - LBUG(); + if (!res) RETURN(NULL); - } spin_lock(&ns->ns_counter_lock); ns->ns_resources++; @@ -519,10 +515,13 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, } } - if (create) + if (create) { res = ldlm_resource_add(ns, parent, name, type); - else + if (res == NULL) + GOTO(out, NULL); + } else { res = NULL; + } if (create && ns->ns_lvbo && ns->ns_lvbo->lvbo_init) { int rc; @@ -540,6 +539,7 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, CERROR("lvbo_init failed for resource "LPU64": rc %d\n", name.name[0], rc); } else { +out: l_unlock(&ns->ns_lock); } @@ -580,22 +580,22 @@ int ldlm_resource_putref(struct ldlm_resource *res) } if (!list_empty(&res->lr_granted)) { - ldlm_resource_dump(res); + ldlm_resource_dump(D_ERROR, res); LBUG(); } if (!list_empty(&res->lr_converting)) { - ldlm_resource_dump(res); + ldlm_resource_dump(D_ERROR, res); LBUG(); } if (!list_empty(&res->lr_waiting)) { - ldlm_resource_dump(res); + ldlm_resource_dump(D_ERROR, res); LBUG(); } if (!list_empty(&res->lr_children)) { - ldlm_resource_dump(res); + ldlm_resource_dump(D_ERROR, res); LBUG(); } @@ -624,7 +624,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, { l_lock(&res->lr_namespace->ns_lock); - ldlm_resource_dump(res); + ldlm_resource_dump(D_OTHER, res); CDEBUG(D_OTHER, "About to add this lock:\n"); ldlm_lock_dump(D_OTHER, lock, 0); @@ -653,44 +653,44 @@ void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc) memcpy(&desc->lr_name, &res->lr_name, sizeof(desc->lr_name)); } -void ldlm_dump_all_namespaces(void) +void ldlm_dump_all_namespaces(int level) { struct list_head *tmp; - spin_lock(&ldlm_namespace_lock); + down(&ldlm_namespace_lock); list_for_each(tmp, &ldlm_namespace_list) { struct ldlm_namespace *ns; ns = list_entry(tmp, struct ldlm_namespace, ns_list_chain); - ldlm_namespace_dump(ns); + ldlm_namespace_dump(level, ns); } - spin_unlock(&ldlm_namespace_lock); + up(&ldlm_namespace_lock); } -void ldlm_namespace_dump(struct ldlm_namespace *ns) +void ldlm_namespace_dump(int level, struct ldlm_namespace *ns) { struct list_head *tmp; - unsigned int debug_save = portal_debug; - portal_debug |= D_OTHER; - l_lock(&ns->ns_lock); - CDEBUG(D_OTHER, "--- Namespace: %s (rc: %d, client: %d)\n", ns->ns_name, + CDEBUG(level, "--- Namespace: %s (rc: %d, client: %d)\n", ns->ns_name, ns->ns_refcount, ns->ns_client); - list_for_each(tmp, &ns->ns_root_list) { - struct ldlm_resource *res; - res = list_entry(tmp, struct ldlm_resource, lr_childof); + l_lock(&ns->ns_lock); + if (time_after(jiffies, ns->ns_next_dump)) { + list_for_each(tmp, &ns->ns_root_list) { + struct ldlm_resource *res; + res = list_entry(tmp, struct ldlm_resource, lr_childof); - /* Once we have resources with children, this should really dump - * them recursively. */ - ldlm_resource_dump(res); + /* Once we have resources with children, this should + * really dump them recursively. */ + ldlm_resource_dump(level, res); + } + ns->ns_next_dump = jiffies + 10 * HZ; } l_unlock(&ns->ns_lock); - portal_debug = debug_save; } -void ldlm_resource_dump(struct ldlm_resource *res) +void ldlm_resource_dump(int level, struct ldlm_resource *res) { struct list_head *tmp; int pos; @@ -698,36 +698,36 @@ void ldlm_resource_dump(struct ldlm_resource *res) if (RES_NAME_SIZE != 4) LBUG(); - CDEBUG(D_OTHER, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64 + CDEBUG(level, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64 ") (rc: %d)\n", res, res->lr_name.name[0], res->lr_name.name[1], res->lr_name.name[2], res->lr_name.name[3], atomic_read(&res->lr_refcount)); if (!list_empty(&res->lr_granted)) { pos = 0; - CDEBUG(D_OTHER, "Granted locks:\n"); + CDEBUG(level, "Granted locks:\n"); list_for_each(tmp, &res->lr_granted) { struct ldlm_lock *lock; lock = list_entry(tmp, struct ldlm_lock, l_res_link); - ldlm_lock_dump(D_OTHER, lock, ++pos); + ldlm_lock_dump(level, lock, ++pos); } } if (!list_empty(&res->lr_converting)) { pos = 0; - CDEBUG(D_OTHER, "Converting locks:\n"); + CDEBUG(level, "Converting locks:\n"); list_for_each(tmp, &res->lr_converting) { struct ldlm_lock *lock; lock = list_entry(tmp, struct ldlm_lock, l_res_link); - ldlm_lock_dump(D_OTHER, lock, ++pos); + ldlm_lock_dump(level, lock, ++pos); } } if (!list_empty(&res->lr_waiting)) { pos = 0; - CDEBUG(D_OTHER, "Waiting locks:\n"); + CDEBUG(level, "Waiting locks:\n"); list_for_each(tmp, &res->lr_waiting) { struct ldlm_lock *lock; lock = list_entry(tmp, struct ldlm_lock, l_res_link); - ldlm_lock_dump(D_OTHER, lock, ++pos); + ldlm_lock_dump(level, lock, ++pos); } } } diff --git a/lustre/ldlm/ldlm_test.c b/lustre/ldlm/ldlm_test.c index 5381b5b..7a5e066 100644 --- a/lustre/ldlm/ldlm_test.c +++ b/lustre/ldlm/ldlm_test.c @@ -218,8 +218,7 @@ int ldlm_test_extents(struct obd_device *obddev) LBUG(); flags = 0; - lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL, - 0); + lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,0); if (lock1 == NULL) LBUG(); err = ldlm_lock_enqueue(ns, lock1, &ext1, sizeof(ext1), &flags, NULL, @@ -230,8 +229,7 @@ int ldlm_test_extents(struct obd_device *obddev) LBUG(); flags = 0; - lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, - NULL, 0); + lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,0); err = ldlm_lock_enqueue(ns, lock2, &ext2, sizeof(ext2), &flags, NULL, NULL); if (err != ELDLM_OK) diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 8567ae8..2d37696 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -303,7 +303,6 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, struct dentry **de = icbd->icbd_childp; struct inode *parent = icbd->icbd_parent; struct ll_sb_info *sbi = ll_i2sbi(parent); - struct dentry *dentry = *de; struct inode *inode = NULL; int rc; @@ -313,7 +312,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, ENTRY; rc = ll_prep_inode(sbi->ll_osc_exp, &inode, request, offset, - dentry->d_sb); + (*de)->d_sb); if (rc) RETURN(rc); @@ -339,16 +338,16 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, } } - dentry = *de = ll_find_alias(inode, dentry); + *de = ll_find_alias(inode, *de); } else { ENTRY; spin_lock(&dcache_lock); - ll_d_add(dentry, inode); + ll_d_add(*de, inode); spin_unlock(&dcache_lock); } - ll_set_dd(dentry); - dentry->d_op = &ll_d_ops; + ll_set_dd(*de); + (*de)->d_op = &ll_d_ops; RETURN(0); } @@ -369,9 +368,9 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, if (dentry->d_name.len > EXT3_NAME_LEN) RETURN(ERR_PTR(-ENAMETOOLONG)); - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n", - dentry->d_name.name, parent->i_ino, parent->i_generation, - parent, LL_IT2STR(it)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%*s,dir=%lu/%u(%p),intent=%s\n", + dentry->d_name.len, dentry->d_name.name, parent->i_ino, + parent->i_generation, parent, LL_IT2STR(it)); if (d_mountpoint(dentry)) CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it)); @@ -478,9 +477,9 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, int rc = 0; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n", - dentry->d_name.name, dir->i_ino, dir->i_generation, dir, - LL_IT2STR(it)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%*s,dir=%lu/%u(%p),intent=%s\n", + dentry->d_name.len, dentry->d_name.name, dir->i_ino, + dir->i_generation, dir, LL_IT2STR(it)); rc = it_open_error(DISP_OPEN_CREATE, it); if (rc) @@ -526,15 +525,13 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) { struct ptlrpc_request *request = NULL; struct inode *dir = nd->dentry->d_inode; - const char *name = nd->last.name; - int len = nd->last.len; struct ll_sb_info *sbi = ll_i2sbi(dir); struct mdc_op_data op_data; int err = -EMLINK; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); + nd->last.name, dir->i_ino, dir->i_generation, dir); if (dir->i_nlink >= EXT3_LINK_MAX) RETURN(err); @@ -549,7 +546,8 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) case S_IFBLK: case S_IFIFO: case S_IFSOCK: - ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); + ll_prepare_mdc_op_data(&op_data, dir, NULL, nd->last.name, + nd->last.len, 0); err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode, current->fsuid, current->fsgid, rdev, &request); @@ -566,20 +564,19 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) RETURN(err); } -static int ll_mknod(struct inode *dir, struct dentry *child, int mode, +static int ll_mknod(struct inode *dir, struct dentry *dchild, int mode, ll_dev_t rdev) { struct ptlrpc_request *request = NULL; struct inode *inode = NULL; - const char *name = child->d_name.name; - int len = child->d_name.len; struct ll_sb_info *sbi = ll_i2sbi(dir); struct mdc_op_data op_data; int err = -EMLINK; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%*s,dir=%lu/%u(%p)\n", + dchild->d_name.len, dchild->d_name.name, + dir->i_ino, dir->i_generation, dir); if (dir->i_nlink >= EXT3_LINK_MAX) RETURN(err); @@ -594,7 +591,8 @@ static int ll_mknod(struct inode *dir, struct dentry *child, int mode, case S_IFBLK: case S_IFIFO: case S_IFSOCK: - ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); + ll_prepare_mdc_op_data(&op_data, dir, NULL, dchild->d_name.name, + dchild->d_name.len, 0); err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode, current->fsuid, current->fsgid, rdev, &request); @@ -604,7 +602,7 @@ static int ll_mknod(struct inode *dir, struct dentry *child, int mode, ll_update_times(request, 0, dir); err = ll_prep_inode(sbi->ll_osc_exp, &inode, request, 0, - child->d_sb); + dchild->d_sb); if (err) GOTO(out_err, err); break; @@ -615,7 +613,7 @@ static int ll_mknod(struct inode *dir, struct dentry *child, int mode, RETURN(-EINVAL); } - d_instantiate(child, inode); + d_instantiate(dchild, inode); out_err: ptlrpc_req_finished(request); RETURN(err); @@ -624,8 +622,6 @@ static int ll_mknod(struct inode *dir, struct dentry *child, int mode, static int ll_symlink_raw(struct nameidata *nd, const char *tgt) { struct inode *dir = nd->dentry->d_inode; - const char *name = nd->last.name; - int len = nd->last.len; struct ptlrpc_request *request = NULL; struct ll_sb_info *sbi = ll_i2sbi(dir); struct mdc_op_data op_data; @@ -633,12 +629,13 @@ static int ll_symlink_raw(struct nameidata *nd, const char *tgt) ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),target=%s\n", - name, dir->i_ino, dir->i_generation, dir, tgt); + nd->last.name, dir->i_ino, dir->i_generation, dir, tgt); if (dir->i_nlink >= EXT3_LINK_MAX) RETURN(err); - ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); + ll_prepare_mdc_op_data(&op_data, dir, NULL, nd->last.name, + nd->last.len, 0); err = mdc_create(sbi->ll_mdc_exp, &op_data, tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO, current->fsuid, current->fsgid, 0, &request); @@ -653,19 +650,18 @@ static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd) { struct inode *src = srcnd->dentry->d_inode; struct inode *dir = tgtnd->dentry->d_inode; - const char *name = tgtnd->last.name; - int len = tgtnd->last.len; struct ptlrpc_request *request = NULL; struct mdc_op_data op_data; int err; struct ll_sb_info *sbi = ll_i2sbi(dir); ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),dir=%lu/%u(%p),target=%s\n", - src->i_ino, src->i_generation, src, - dir->i_ino, dir->i_generation, dir, name); + CDEBUG(D_VFSTRACE, "VFS Op:name=%s inode=%lu/%u(%p), dir=%lu/%u(%p), " + "target=%s\n", srcnd->last.name, src->i_ino, src->i_generation, + src, dir->i_ino, dir->i_generation, dir, tgtnd->last.name); - ll_prepare_mdc_op_data(&op_data, src, dir, name, len, 0); + ll_prepare_mdc_op_data(&op_data, src, dir, tgtnd->last.name, + tgtnd->last.len, 0); err = mdc_link(sbi->ll_mdc_exp, &op_data, &request); if (err == 0) ll_update_times(request, 0, dir); @@ -679,21 +675,20 @@ static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd) static int ll_mkdir_raw(struct nameidata *nd, int mode) { struct inode *dir = nd->dentry->d_inode; - const char *name = nd->last.name; - int len = nd->last.len; struct ptlrpc_request *request = NULL; struct ll_sb_info *sbi = ll_i2sbi(dir); struct mdc_op_data op_data; int err = -EMLINK; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); + nd->last.name, dir->i_ino, dir->i_generation, dir); if (dir->i_nlink >= EXT3_LINK_MAX) RETURN(err); mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR; - ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); + ll_prepare_mdc_op_data(&op_data, dir, NULL, nd->last.name, + nd->last.len, 0); err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode, current->fsuid, current->fsgid, 0, &request); if (err == 0) @@ -706,16 +701,15 @@ static int ll_mkdir_raw(struct nameidata *nd, int mode) static int ll_rmdir_raw(struct nameidata *nd) { struct inode *dir = nd->dentry->d_inode; - const char *name = nd->last.name; - int len = nd->last.len; struct ptlrpc_request *request = NULL; struct mdc_op_data op_data; int rc; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); + nd->last.name, dir->i_ino, dir->i_generation, dir); - ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR); + ll_prepare_mdc_op_data(&op_data, dir, NULL, nd->last.name, + nd->last.len, S_IFDIR); rc = mdc_unlink(ll_i2sbi(dir)->ll_mdc_exp, &op_data, &request); if (rc == 0) ll_update_times(request, 0, dir); @@ -796,16 +790,15 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) static int ll_unlink_raw(struct nameidata *nd) { struct inode *dir = nd->dentry->d_inode; - const char *name = nd->last.name; - int len = nd->last.len; struct ptlrpc_request *request = NULL; struct mdc_op_data op_data; int rc; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); + nd->last.name, dir->i_ino, dir->i_generation, dir); - ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); + ll_prepare_mdc_op_data(&op_data, dir, NULL, nd->last.name, + nd->last.len, 0); rc = mdc_unlink(ll_i2sbi(dir)->ll_mdc_exp, &op_data, &request); if (rc) GOTO(out, rc); @@ -818,26 +811,24 @@ static int ll_unlink_raw(struct nameidata *nd) RETURN(rc); } -static int ll_rename_raw(struct nameidata *oldnd, struct nameidata *newnd) +static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd) { - struct inode *src = oldnd->dentry->d_inode; - struct inode *tgt = newnd->dentry->d_inode; - const char *oldname = oldnd->last.name; - int oldlen = oldnd->last.len; - const char *newname = newnd->last.name; - int newlen = newnd->last.len; + struct inode *src = srcnd->dentry->d_inode; + struct inode *tgt = tgtnd->dentry->d_inode; struct ptlrpc_request *request = NULL; struct ll_sb_info *sbi = ll_i2sbi(src); struct mdc_op_data op_data; int err; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:oldname=%s,src_dir=%lu/%u(%p),newname=%s," - "tgt_dir=%lu/%u(%p)\n", oldname, src->i_ino, src->i_generation, - src, newname, tgt->i_ino, tgt->i_generation, tgt); + "tgt_dir=%lu/%u(%p)\n", + srcnd->last.name, src->i_ino, src->i_generation, src, + tgtnd->last.name, tgt->i_ino, tgt->i_generation, tgt); ll_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0); err = mdc_rename(sbi->ll_mdc_exp, &op_data, - oldname, oldlen, newname, newlen, &request); + srcnd->last.name, srcnd->last.len, + tgtnd->last.name, tgtnd->last.len, &request); if (!err) { ll_update_times(request, 0, src); ll_update_times(request, 0, tgt); diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c index a7be61a..57dc9b3 100644 --- a/lustre/llite/symlink.c +++ b/lustre/llite/symlink.c @@ -53,7 +53,7 @@ static int ll_readlink_internal(struct inode *inode, if (rc) { if (rc != -ENOENT) CERROR("inode %lu: rc = %d\n", inode->i_ino, rc); - RETURN(rc); + GOTO (failed, rc); } body = lustre_msg_buf ((*request)->rq_repmsg, 0, sizeof (*body)); @@ -90,7 +90,7 @@ static int ll_readlink_internal(struct inode *inode, failed: ptlrpc_req_finished (*request); - RETURN (-EPROTO); + RETURN (rc); } static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) @@ -139,8 +139,11 @@ static int ll_follow_link(struct dentry *dentry, struct nameidata *nd) down(&lli->lli_open_sem); rc = ll_readlink_internal(inode, &request, &symname); up(&lli->lli_open_sem); - if (rc) + if (rc) { + path_release(nd); /* Kernel assumes that ->follow_link() + releases nameidata on error */ GOTO(out, rc); + } rc = vfs_follow_link(nd, symname); ptlrpc_req_finished(request); diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 5bec189..ff19e0f 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -217,6 +217,7 @@ static int lov_disconnect(struct obd_export *exp, int flags) { struct obd_device *obd = class_exp2obd(exp); struct lov_obd *lov = &obd->u.lov; + struct obd_export *osc_exp; int rc, i; ENTRY; @@ -228,24 +229,30 @@ static int lov_disconnect(struct obd_export *exp, int flags) if (lov->refcount != 0) goto out_local; + spin_lock(&lov->lov_lock); for (i = 0; i < lov->desc.ld_tgt_count; i++) { if (lov->tgts[i].ltd_exp == NULL) continue; + osc_exp = lov->tgts[i].ltd_exp; + lov->tgts[i].ltd_exp = NULL; + if (obd->obd_no_recov) { /* Pass it on to our clients. * XXX This should be an argument to disconnect, * XXX not a back-door flag on the OBD. Ah well. */ struct obd_device *osc_obd; - osc_obd = class_exp2obd(lov->tgts[i].ltd_exp); + osc_obd = class_exp2obd(osc_exp); if (osc_obd) osc_obd->obd_no_recov = 1; } - obd_register_observer(lov->tgts[i].ltd_exp->exp_obd, NULL); + obd_register_observer(osc_exp->exp_obd, NULL); - rc = obd_disconnect(lov->tgts[i].ltd_exp, flags); + spin_unlock(&lov->lov_lock); + rc = obd_disconnect(osc_exp, flags); + spin_lock(&lov->lov_lock); if (rc) { if (lov->tgts[i].active) { CERROR("Target %s disconnect error %d\n", @@ -257,8 +264,8 @@ static int lov_disconnect(struct obd_export *exp, int flags) lov->desc.ld_active_tgt_count--; lov->tgts[i].active = 0; } - lov->tgts[i].ltd_exp = NULL; } + spin_unlock(&lov->lov_lock); out_local: rc = class_disconnect(exp, 0); @@ -283,6 +290,9 @@ static int lov_set_osc_active(struct lov_obd *lov, struct obd_uuid *uuid, spin_lock(&lov->lov_lock); for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) { + if (tgt->ltd_exp == NULL) + continue; + CDEBUG(D_INFO, "lov idx %d is %s conn "LPX64"\n", i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie); if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0) @@ -1988,7 +1998,7 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, char submd_buf[sizeof(struct lov_stripe_md) + sizeof(struct lov_oinfo)]; struct lov_stripe_md *submd = (void *)submd_buf; ldlm_error_t rc; - int i, save_flags = *flags; + int i, save_flags = *flags, all_skipped = 1; ENTRY; if (lsm_bad_magic(lsm)) @@ -2027,6 +2037,8 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, continue; } + all_skipped = 0; + /* XXX LOV STACKING: submd should be from the subobj */ submd->lsm_object_id = loi->loi_id; submd->lsm_stripe_count = 0; @@ -2092,6 +2104,9 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, } } } + if (all_skipped) + GOTO(out_lockh, rc = -EIO); + if (lsm->lsm_stripe_count > 1) lov_llh_put(lov_lockh); RETURN(ELDLM_OK); @@ -2116,6 +2131,7 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, } } +out_lockh: if (lsm->lsm_stripe_count > 1) { lov_llh_destroy(lov_lockh); lov_llh_put(lov_lockh); @@ -2493,9 +2509,10 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, len, karg, uarg); if (err) { if (lov->tgts[i].active) { - CERROR("error: iocontrol OSC %s on OST" - "idx %d: err = %d\n", - lov->tgts[i].uuid.uuid, i, err); + CERROR("error: iocontrol OSC %s on OST " + "idx %d cmd %x: err = %d\n", + lov->tgts[i].uuid.uuid, i, + cmd, err); if (!rc) rc = err; } diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index e171b73..49d51ac 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -431,8 +431,8 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt, /* We could just return 1 immediately, but since we should only * be called in revalidate_it if we already have a lock, let's * verify that. */ - struct ldlm_res_id res_id ={.name = {cfid->id, - cfid->generation}}; + struct ldlm_res_id res_id = { .name = { cfid->id, + cfid->generation}}; struct lustre_handle lockh; int mode = LCK_PR; diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 1e2133e..e6320ef 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -219,13 +219,6 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, if (!inode) RETURN(ERR_PTR(-ENOENT)); - if (is_bad_inode(inode)) { - CERROR("bad inode returned %lu/%u\n", - inode->i_ino, inode->i_generation); - dput(result); - RETURN(ERR_PTR(-ENOENT)); - } - if (generation && inode->i_generation != generation) { /* we didn't find the right inode.. */ CERROR("bad inode %lu, link: %lu ct: %d or generation %u/%u\n", @@ -349,7 +342,7 @@ static int mds_destroy_export(struct obd_export *export) /* If you change this message, be sure to update * replay_single:test_46 */ - CERROR("force closing client file handle for %*s (%s:%lu)\n", + CDEBUG(D_INODE, "force closing file handle for %*s (%s:%lu)\n", dentry->d_name.len, dentry->d_name.name, ll_bdevname(dentry->d_inode->i_sb, btmp), dentry->d_inode->i_ino); @@ -483,8 +476,6 @@ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md, if (lock) down(&inode->i_sem); rc = fsfilt_get_md(obd, inode, md, *size); - if (lock) - up(&inode->i_sem); if (rc < 0) { CERROR("Error %d reading eadata for ino %lu\n", @@ -500,6 +491,8 @@ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md, *size = rc; } } + if (lock) + up(&inode->i_sem); RETURN (rc); } @@ -757,7 +750,10 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, struct ldlm_resource *res; DEBUG_REQ(D_DLMTRACE, req, "resent, not enqueuing new locks"); granted_lock = ldlm_handle2lock(child_lockh); - LASSERT(granted_lock); + LASSERTF(granted_lock != NULL, LPU64"/%u lockh "LPX64"\n", + body->fid1.id, body->fid1.generation, + child_lockh->cookie); + res = granted_lock->l_resource; child_fid.id = res->lr_name.name[0]; @@ -1683,6 +1679,7 @@ static int mds_cleanup(struct obd_device *obd, int flags) static void fixup_handle_for_resent_req(struct ptlrpc_request *req, struct ldlm_lock *new_lock, + struct ldlm_lock **old_lock, struct lustre_handle *lockh) { struct obd_export *exp = req->rq_export; @@ -1703,8 +1700,11 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req, continue; if (lock->l_remote_handle.cookie == remote_hdl.cookie) { lockh->cookie = lock->l_handle.h_cookie; + LDLM_DEBUG(lock, "restoring lock cookie"); DEBUG_REQ(D_HA, req, "restoring lock cookie "LPX64, lockh->cookie); + if (old_lock) + *old_lock = LDLM_LOCK_GET(lock); l_unlock(&obd->obd_namespace->ns_lock); return; } @@ -1751,7 +1751,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns, struct mds_obd *mds = &req->rq_export->exp_obd->u.mds; struct ldlm_reply *rep; struct lustre_handle lockh = { 0 }; - struct ldlm_lock *new_lock; + struct ldlm_lock *new_lock = NULL; int rc, offset = 2, repsize[4] = {sizeof(struct ldlm_reply), sizeof(struct mds_body), mds->mds_max_mdsize, @@ -1784,12 +1784,12 @@ static int mds_intent_policy(struct ldlm_namespace *ns, rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); intent_set_disposition(rep, DISP_IT_EXECD); - fixup_handle_for_resent_req(req, lock, &lockh); /* execute policy */ switch ((long)it->opc) { case IT_OPEN: case IT_CREAT|IT_OPEN: + fixup_handle_for_resent_req(req, lock, NULL, &lockh); /* XXX swab here to assert that an mds_open reint * packet is following */ rep->lock_policy_res2 = mds_reint(req, offset, &lockh); @@ -1806,6 +1806,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns, case IT_GETATTR: case IT_LOOKUP: case IT_READDIR: + fixup_handle_for_resent_req(req, lock, &new_lock, &lockh); rep->lock_policy_res2 = mds_getattr_name(offset, req, &lockh); /* FIXME: LDLM can set req->rq_status. MDS sets policy_res{1,2} with disposition and status. @@ -1833,11 +1834,13 @@ static int mds_intent_policy(struct ldlm_namespace *ns, * drop it below anyways because lock replay is done separately by the * client afterwards. For regular RPCs we want to give the new lock to * the client instead of whatever lock it was about to get. */ - new_lock = ldlm_handle2lock(&lockh); + if (new_lock == NULL) + new_lock = ldlm_handle2lock(&lockh); if (new_lock == NULL && (flags & LDLM_FL_INTENT_ONLY)) RETURN(0); - LASSERT(new_lock != NULL); + LASSERTF(new_lock != NULL, "op "LPX64" lockh "LPX64"\n", + it->opc, lockh.cookie); /* If we've already given this lock to a client once, then we should * have no readers or writers. Otherwise, we should have one reader diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index 7d8dc6d..95f8650 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -415,12 +415,13 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt) CERROR("cannot lookup __iopen__ directory: rc = %d\n", rc); GOTO(err_pop, rc); } - if (!dentry->d_inode) { + + mds->mds_fid_de = dentry; + if (!dentry->d_inode || is_bad_inode(dentry->d_inode)) { rc = -ENOENT; CERROR("__iopen__ directory has no inode? rc = %d\n", rc); GOTO(err_fid, rc); } - mds->mds_fid_de = dentry; dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777, 1); if (IS_ERR(dentry)) { diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c index 7d4a614..71fdbdd 100644 --- a/lustre/mds/mds_lib.c +++ b/lustre/mds/mds_lib.c @@ -244,7 +244,7 @@ static int mds_rename_unpack(struct ptlrpc_request *req, int offset, ENTRY; rec = lustre_swab_reqbuf (req, offset, sizeof (*rec), - lustre_swab_mds_rec_unlink); + lustre_swab_mds_rec_rename); if (rec == NULL) RETURN(-EFAULT); diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index 641c423e..58f5c99 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -689,25 +689,21 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid, struct mds_update_record *rec,struct ldlm_reply *rep) { struct mds_obd *mds = mds_req2mds(req); - struct inode *pending_dir = mds->mds_pending_dir->d_inode; struct dentry *dchild; char fidname[LL_FID_NAMELEN]; int fidlen = 0, rc; void *handle = NULL; ENTRY; - down(&pending_dir->i_sem); fidlen = ll_fid2str(fidname, fid->id, fid->generation); - dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen); + dchild = ll_lookup_one_len(fidname, mds->mds_pending_dir, fidlen); if (IS_ERR(dchild)) { - up(&pending_dir->i_sem); rc = PTR_ERR(dchild); CERROR("error looking up %s in PENDING: rc = %d\n",fidname, rc); RETURN(rc); } if (dchild->d_inode != NULL) { - up(&pending_dir->i_sem); mds_inode_set_orphan(dchild->d_inode); mds_pack_inode2fid(&body->fid1, dchild->d_inode); mds_pack_inode2body(body, dchild->d_inode); @@ -717,8 +713,7 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid, fidname); goto open; } - dput(dchild); - up(&pending_dir->i_sem); + l_dput(dchild); /* We didn't find it in PENDING so it isn't an orphan. See * if it was a regular inode that was previously created. */ diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index a141fd2..08f021a 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -1044,9 +1044,16 @@ int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds, GOTO(cleanup, rc); } + cleanup_phase = 2; /* child dentry */ inode = (*dchildp)->d_inode; - if (inode != NULL) + if (inode != NULL) { + if (is_bad_inode(inode)) { + CERROR("bad inode returned %lu/%u\n", + inode->i_ino, inode->i_generation); + GOTO(cleanup, rc = -ENOENT); + } inode = igrab(inode); + } if (inode == NULL) goto retry_locks; @@ -1784,7 +1791,8 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, } else if (S_ISREG(new_inode->i_mode)) { mds_pack_inode2fid(&body->fid1, new_inode); mds_pack_inode2body(body, new_inode); - mds_pack_md(obd, req->rq_repmsg, 1, body, new_inode, MDS_PACK_MD_LOCK); + mds_pack_md(obd, req->rq_repmsg, 1, body, new_inode, + MDS_PACK_MD_LOCK); } } diff --git a/lustre/mds/mds_unlink_open.c b/lustre/mds/mds_unlink_open.c index 0de3f3b..04a4513 100644 --- a/lustre/mds/mds_unlink_open.c +++ b/lustre/mds/mds_unlink_open.c @@ -175,8 +175,8 @@ int mds_cleanup_orphans(struct obd_device *obd) struct l_linux_dirent *dirent, *n; struct list_head dentry_list; char d_name[LL_FID_NAMELEN]; - __u64 i = 0; - int rc = 0, item = 0, namlen; + unsigned long inum; + int i = 0, rc = 0, item = 0, namlen; ENTRY; push_ctxt(&saved, &obd->obd_ctxt, NULL); @@ -199,21 +199,20 @@ int mds_cleanup_orphans(struct obd_device *obd) GOTO(err_out, rc); list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) { - i ++; + i++; list_del(&dirent->lld_list); namlen = strlen(dirent->lld_name); LASSERT(sizeof(d_name) >= namlen + 1); strcpy(d_name, dirent->lld_name); + inum = dirent->lld_ino; OBD_FREE(dirent, sizeof(*dirent)); - CDEBUG(D_INODE, "entry "LPU64" of PENDING DIR: %s\n", - i, d_name); + CDEBUG(D_INODE, "entry %d of PENDING DIR: %s\n", i, d_name); if (((namlen == 1) && !strcmp(d_name, ".")) || - ((namlen == 2) && !strcmp(d_name, ".."))) { + ((namlen == 2) && !strcmp(d_name, "..")) || inum == 0) continue; - } down(&pending_dir->i_sem); dchild = lookup_one_len(d_name, mds->mds_pending_dir, namlen); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 380a80a..85c008c 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -201,7 +201,8 @@ void class_release_dev(struct obd_device *obd) int minor = obd->obd_minor; spin_lock(&obd_dev_lock); - memset(obd, 0, sizeof(*obd)); + obd->obd_type = NULL; + //memset(obd, 0, sizeof(*obd)); obd->obd_minor = minor; spin_unlock(&obd_dev_lock); } @@ -805,15 +806,24 @@ static int oig_done(struct obd_io_group *oig) static void interrupted_oig(void *data) { struct obd_io_group *oig = data; - struct list_head *pos; struct oig_callback_context *occ; unsigned long flags; spin_lock_irqsave(&oig->oig_lock, flags); - list_for_each(pos, &oig->oig_occ_list) { - occ = list_entry(pos, struct oig_callback_context, - occ_oig_item); + /* We need to restart the processing each time we drop the lock, as + * it is possible other threads called oig_complete_one() to remove + * an entry elsewhere in the list while we dropped lock. We need to + * drop the lock because osc_ap_completion() calls oig_complete_one() + * which re-gets this lock ;-) as well as a lock ordering issue. */ +restart: + list_for_each_entry(occ, &oig->oig_occ_list, occ_oig_item) { + if (occ->interrupted) + continue; + occ->interrupted = 1; + spin_unlock_irqrestore(&oig->oig_lock, flags); occ->occ_interrupted(occ); + spin_lock_irqsave(&oig->oig_lock, flags); + goto restart; } spin_unlock_irqrestore(&oig->oig_lock, flags); } diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 9933871..926420f 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -707,8 +707,7 @@ int lprocfs_write_helper(const char *buffer, unsigned long count, return 0; } -int lprocfs_write_u64_helper(const char *buffer, unsigned long count, - __u64 *val) +int lprocfs_write_u64_helper(const char *buffer, unsigned long count,__u64 *val) { char kernbuf[22], *end; @@ -720,7 +719,10 @@ int lprocfs_write_u64_helper(const char *buffer, unsigned long count, kernbuf[count] = '\0'; - *val = simple_strtoull(kernbuf, &end, 0); + if (kernbuf[0] == '-') + *val = -simple_strtoull(kernbuf + 1, &end, 0); + else + *val = simple_strtoull(kernbuf, &end, 0); if (kernbuf == end) return -EINVAL; diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index eabd4a8..16cd144 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -677,16 +677,16 @@ static int filter_prep_groups(struct obd_device *obd) if (filp->f_dentry->d_inode->i_size == 0) { if (i == 0 && filter->fo_fsd->fsd_unused != 0) { /* OST conversion, remove sometime post 1.0 */ - filter->fo_last_objids[i] = + filter->fo_last_objids[0] = le64_to_cpu(filter->fo_fsd->fsd_unused); CWARN("saving old objid "LPU64" to LAST_ID\n", - filter->fo_last_objids[i]); - rc = filter_update_last_objid(obd, 0, 1); - if (rc) - GOTO(cleanup, rc); + filter->fo_last_objids[0]); } else { filter->fo_last_objids[i] = FILTER_INIT_OBJID; } + rc = filter_update_last_objid(obd, i, 1); + if (rc) + GOTO(cleanup, rc); continue; } @@ -939,8 +939,7 @@ struct dentry *filter_parent_lock(struct obd_device *obd, obd_gr group, return dparent; rc = filter_lock_dentry(obd, dparent); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow parent lock %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "parent lock"); return rc ? ERR_PTR(rc) : dparent; } @@ -1437,6 +1436,7 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func) obd_size maxsize = obd->obd_osfs.os_blocks * obd->obd_osfs.os_bsize; obd_size tot_dirty = 0, tot_pending = 0, tot_granted = 0; obd_size fo_tot_dirty, fo_tot_pending, fo_tot_granted; + int level = D_CACHE; if (list_empty(&obd->obd_exports)) return; @@ -1445,13 +1445,20 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func) spin_lock(&obd->obd_dev_lock); list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) { fed = &exp->exp_filter_data; - LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize, - "cli %s/%p %lu+%lu > "LPU64"\n", - exp->exp_client_uuid.uuid, exp, - fed->fed_grant, fed->fed_pending, maxsize); - LASSERTF(fed->fed_dirty <= maxsize, "cli %s/%p %lu > "LPU64"\n", - exp->exp_client_uuid.uuid, exp,fed->fed_dirty,maxsize); - CDEBUG(D_CACHE,"%s: cli %s/%p dirty %lu pend %lu grant %lu\n", + if (fed->fed_grant < 0 || fed->fed_pending < 0 || + fed->fed_dirty < 0) + level = D_ERROR; + if (maxsize > 0) { /* we may not have done a statfs yet */ + LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize, + "cli %s/%p %ld+%ld > "LPU64"\n", + exp->exp_client_uuid.uuid, exp, + fed->fed_grant, fed->fed_pending, maxsize); + LASSERTF(fed->fed_dirty <= maxsize, + "cli %s/%p %ld > "LPU64"\n", + exp->exp_client_uuid.uuid, exp, + fed->fed_dirty, maxsize); + } + CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, fed->fed_dirty, fed->fed_pending, fed->fed_grant); tot_granted += fed->fed_grant + fed->fed_pending; @@ -1494,27 +1501,30 @@ static void filter_grant_discard(struct obd_export *exp) struct obd_device *obd = exp->exp_obd; struct filter_obd *filter = &obd->u.filter; struct filter_export_data *fed = &exp->exp_filter_data; + int level = D_CACHE; spin_lock(&obd->obd_osfs_lock); spin_lock(&exp->exp_obd->obd_dev_lock); list_del_init(&exp->exp_obd_chain); spin_unlock(&exp->exp_obd->obd_dev_lock); - CDEBUG(D_CACHE, "%s: cli %s/%p dirty %lu pend %lu grant %lu\n", + if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0) + level = D_ERROR; + CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, fed->fed_dirty, fed->fed_pending, fed->fed_grant); LASSERTF(filter->fo_tot_granted >= fed->fed_grant, - "%s: tot_granted "LPU64" cli %s/%p fed_grant %lu\n", + "%s: tot_granted "LPU64" cli %s/%p fed_grant %ld\n", obd->obd_name, filter->fo_tot_granted, exp->exp_client_uuid.uuid, exp, fed->fed_grant); filter->fo_tot_granted -= fed->fed_grant; LASSERTF(exp->exp_obd->u.filter.fo_tot_pending >= fed->fed_pending, - "%s: tot_pending "LPU64" cli %s/%p fed_pending %lu\n", + "%s: tot_pending "LPU64" cli %s/%p fed_pending %ld\n", obd->obd_name, filter->fo_tot_pending, exp->exp_client_uuid.uuid, exp, fed->fed_pending); LASSERTF(filter->fo_tot_dirty >= fed->fed_dirty, - "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %lu\n", + "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %ld\n", obd->obd_name, filter->fo_tot_dirty, exp->exp_client_uuid.uuid, exp, fed->fed_dirty); filter->fo_tot_dirty -= fed->fed_dirty; @@ -1551,7 +1561,7 @@ static int filter_disconnect(struct obd_export *exp, int flags) struct obd_device *obd = exp->exp_obd; unsigned long irqflags; struct llog_ctxt *ctxt; - int rc; + int rc, err; ENTRY; LASSERT(exp); @@ -1574,7 +1584,9 @@ static int filter_disconnect(struct obd_export *exp, int flags) /* flush any remaining cancel messages out to the target */ ctxt = llog_get_context(obd, LLOG_UNLINK_REPL_CTXT); - llog_sync(ctxt, exp); + err = llog_sync(ctxt, exp); + if (err) + CERROR("error flushing logs to MDS: rc %d\n", err); class_export_put(exp); RETURN(rc); @@ -1827,7 +1839,7 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa, (oa->o_flags & OBD_FL_DELORPHAN)) { if (diff >= 0) RETURN(diff); - if (-diff > 10000) { /* XXX make this smarter */ + if (-diff > OST_MAX_PRECREATE) { CERROR("ignoring bogus orphan destroy request: obdid " LPU64" last_id "LPU64"\n", oa->o_id, filter_last_id(filter, oa)); @@ -1845,7 +1857,8 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa, (group != 0 || oa->o_id == 0)) RETURN(1); - LASSERT(diff >= 0); + LASSERTF(diff >= 0, LPU64" - "LPU64" = %d\n", oa->o_id, + filter_last_id(filter, oa), diff); RETURN(diff); } } @@ -1920,11 +1933,9 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, * already exists */ if (recreate_obj) { - CERROR("%s: Serious error: recreating obj %*s " - "but obj already exists \n", + CERROR("%s: recreating existing object %*s?\n", obd->obd_name, dchild->d_name.len, dchild->d_name.name); - LBUG(); } else { CERROR("%s: Serious error: objid %*s already " "exists; is this filesystem corrupt?\n", @@ -2078,7 +2089,7 @@ static int filter_destroy(struct obd_export *exp, struct obdo *oa, dchild = filter_fid2dentry(obd, dparent, group, oa->o_id); if (IS_ERR(dchild)) - GOTO(cleanup, rc = -ENOENT); + GOTO(cleanup, rc = PTR_ERR(dchild)); cleanup_phase = 2; if (dchild->d_inode == NULL) { diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index 688f28d..a0be7f3 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -92,7 +92,8 @@ enum { LPROC_FILTER_LAST, }; -#define FILTER_MAX_CACHE_SIZE (32 * 1024 * 1024) /* was OBD_OBJECT_EOF */ +//#define FILTER_MAX_CACHE_SIZE (32 * 1024 * 1024) /* was OBD_OBJECT_EOF */ +#define FILTER_MAX_CACHE_SIZE OBD_OBJECT_EOF /* filter.c */ void f_dput(struct dentry *); diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index b9b7ab3..bed3213 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -339,11 +339,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, fso[i].fso_bufcnt = o->ioo_bufcnt; } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow preprw_read setup %lus\n", (jiffies - now) / HZ); - else - CDEBUG(D_INFO, "preprw_read setup: %lu jiffies\n", - (jiffies - now)); + fsfilt_check_slow(now, obd_timeout, "preprw_read setup"); for (i = 0, o = obj, rnb = nb, lnb = res; i < objcount; i++, o++) { dentry = fso[i].fso_dentry; @@ -382,11 +378,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, } } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow start_page_read %lus\n", (jiffies - now) / HZ); - else - CDEBUG(D_INFO, "start_page_read: %lu jiffies\n", - (jiffies - now)); + fsfilt_check_slow(now, obd_timeout, "start_page_read"); lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, tot_bytes); while (lnb-- > res) { @@ -399,11 +391,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, } } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow finish_page_read %lus\n", (jiffies - now) / HZ); - else - CDEBUG(D_INFO, "finish_page_read: %lu jiffies\n", - (jiffies - now)); + fsfilt_check_slow(now, obd_timeout, "finish_page_read"); filter_tally_read(&exp->exp_obd->u.filter, res, niocount); @@ -599,11 +587,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, fso.fso_dentry = dentry; fso.fso_bufcnt = obj->ioo_bufcnt; - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow preprw_write setup %lus\n", (jiffies - now) / HZ); - else - CDEBUG(D_INFO, "preprw_write setup: %lu jiffies\n", - (jiffies - now)); + fsfilt_check_slow(now, obd_timeout, "preprw_write setup"); spin_lock(&exp->exp_obd->obd_osfs_lock); if (oa) @@ -648,11 +632,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, tot_bytes += lnb->len; } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow start_page_write %lus\n", (jiffies - now) / HZ); - else - CDEBUG(D_INFO, "start_page_write: %lu jiffies\n", - (jiffies - now)); + fsfilt_check_slow(now, obd_timeout, "start_page_write"); lprocfs_counter_add(exp->exp_obd->obd_stats, LPROC_FILTER_WRITE_BYTES, tot_bytes); diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index 8d43c7cc..20c08ee 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -152,8 +152,17 @@ static int filter_direct_io(int rw, struct dentry *dchild, struct kiobuf *iobuf, rc = fsync_inode_data_buffers(inode); if (rc == 0) rc = filemap_fdatawait(inode->i_mapping); - if (rc < 0) - GOTO(cleanup, rc); + if (rc < 0) { + /* We can race with truncate_complete_page() in the call to + * filter_clear_page_cache(). This is OK, because it also + * waits on IO completion already, but the truncate confuses + * the buffer_uptodate() in fsync_inode_data_buffers(). + * The only dirty pages in the page cache on an inode should + * be from partial page truncates. + * If there is a real IO error here we'll hit it below. */ + CDEBUG(D_WARNING, "error flushing page cache: rc %d\n", rc); + //GOTO(cleanup, rc); + } rc = brw_kiovec(WRITE, 1, &iobuf, inode->i_dev, iobuf->blocks, 1 << inode->i_blkbits); @@ -316,8 +325,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, GOTO(cleanup, rc); } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow brw_start %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "brw_start"); iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME); /* filter_direct_io drops i_sem */ @@ -326,16 +334,14 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, if (rc == 0) obdo_from_inode(oa, inode, FILTER_VALID_FLAGS); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow direct_io %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "direct_io"); err = fsfilt_commit_wait(obd, inode, wait_handle); if (err) rc = err; if (obd_sync_filter) LASSERT(oti->oti_transno <= obd->obd_last_committed); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "commitrw commit"); cleanup: filter_grant_commit(exp, niocount, res); diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index 77eb003..b3dfed1 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -155,8 +155,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, GOTO(cleanup, rc); } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow brw_start %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "brw_start"); iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME); for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) { @@ -236,8 +235,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, } up(&inode->i_sem); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow direct_io %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "direct_io"); rc = filter_finish_transno(exp, oti, rc); @@ -248,8 +246,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, if (obd_sync_filter) LASSERT(oti->oti_transno <= obd->obd_last_committed); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "commitrw commit"); cleanup: filter_grant_commit(exp, niocount, res); diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 7060ee1..89ec9ba 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -128,7 +128,7 @@ int osc_wr_max_dirty_mb(struct file *file, const char *buffer, if (rc) return rc; - if (val < 0 || val > OSC_MAX_DIRTY_MB_MAX) + if (val < 0 || val > OSC_MAX_DIRTY_MB_MAX || val > num_physpages / 4) return -ERANGE; spin_lock(&cli->cl_loi_list_lock); @@ -165,41 +165,6 @@ int osc_rd_cur_grant_bytes(char *page, char **start, off_t off, int count, return rc; } -int osc_rd_create_low_wm(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = data; - - if (obd == NULL) - return 0; - - return snprintf(page, count, "%d\n", - obd->u.cli.cl_oscc.oscc_kick_barrier); -} - -int osc_wr_create_low_wm(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = data; - int val, rc; - - if (obd == NULL) - return 0; - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - if (val < 0) - return -ERANGE; - - spin_lock(&obd->obd_dev_lock); - obd->u.cli.cl_oscc.oscc_kick_barrier = val; - spin_unlock(&obd->obd_dev_lock); - - return count; -} - int osc_rd_create_count(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -227,6 +192,8 @@ int osc_wr_create_count(struct file *file, const char *buffer, if (val < 0) return -ERANGE; + if (val > OST_MAX_PRECREATE) + return -ERANGE; obd->u.cli.cl_oscc.oscc_grow_count = val; @@ -275,7 +242,6 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "max_dirty_mb", osc_rd_max_dirty_mb, osc_wr_max_dirty_mb, 0 }, { "cur_dirty_bytes", osc_rd_cur_dirty_bytes, 0, 0 }, { "cur_grant_bytes", osc_rd_cur_grant_bytes, 0, 0 }, - {"create_low_watermark", osc_rd_create_low_wm, osc_wr_create_low_wm, 0}, { "create_count", osc_rd_create_count, osc_wr_create_count, 0 }, { "prealloc_next_id", osc_rd_prealloc_next_id, 0, 0 }, { "prealloc_last_id", osc_rd_prealloc_last_id, 0, 0 }, diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c index d5a6141..b6d6701 100644 --- a/lustre/osc/osc_create.c +++ b/lustre/osc/osc_create.c @@ -81,12 +81,14 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data, spin_unlock(&oscc->oscc_lock); } else if (rc != 0 && rc != -EIO) { DEBUG_REQ(D_ERROR, req, - "unknown rc %d from async create: failing oscc", - rc); + "unknown rc %d from async create: failing oscc", rc); oscc->oscc_flags |= OSCC_FLAG_RECOVERING; + oscc->oscc_grow_count = OST_MIN_PRECREATE; spin_unlock(&oscc->oscc_lock); ptlrpc_fail_import(req->rq_import, req->rq_import_generation); } else { + if (rc == 0) + oscc->oscc_flags &= ~OSCC_FLAG_LOW; spin_unlock(&oscc->oscc_lock); } @@ -106,6 +108,17 @@ static int oscc_internal_create(struct osc_creator *oscc) ENTRY; spin_lock(&oscc->oscc_lock); + if (oscc->oscc_grow_count < OST_MAX_PRECREATE && + !(oscc->oscc_flags & (OSCC_FLAG_LOW | OSCC_FLAG_RECOVERING)) && + (__s64)(oscc->oscc_last_id - oscc->oscc_next_id) <= + (oscc->oscc_grow_count / 4 + 1)) { + oscc->oscc_flags |= OSCC_FLAG_LOW; + oscc->oscc_grow_count *= 2; + } + + if (oscc->oscc_grow_count > OST_MAX_PRECREATE) + oscc->oscc_grow_count = OST_MAX_PRECREATE; + if (oscc->oscc_flags & OSCC_FLAG_CREATING || oscc->oscc_flags & OSCC_FLAG_RECOVERING) { spin_unlock(&oscc->oscc_lock); @@ -178,7 +191,7 @@ static int oscc_precreate(struct osc_creator *oscc, int wait) int rc = 0; ENTRY; - if (oscc_has_objects(oscc, oscc->oscc_kick_barrier)) + if (oscc_has_objects(oscc, oscc->oscc_grow_count / 2)) RETURN(0); if (!wait) @@ -225,9 +238,9 @@ int osc_create(struct obd_export *exp, struct obdo *oa, RETURN(osc_real_create(exp, oa, ea, oti)); } - /* this is the special case where create removes orphans */ - if ((oa->o_valid & OBD_MD_FLFLAGS) && - oa->o_flags == OBD_FL_DELORPHAN) { + /* this is the special case where create removes orphans */ + if ((oa->o_valid & OBD_MD_FLFLAGS) && + oa->o_flags == OBD_FL_DELORPHAN) { spin_lock(&oscc->oscc_lock); if (oscc->oscc_flags & OSCC_FLAG_SYNC_IN_PROGRESS) { spin_unlock(&oscc->oscc_lock); @@ -342,9 +355,7 @@ void oscc_init(struct obd_device *obd) init_waitqueue_head(&oscc->oscc_waitq); spin_lock_init(&oscc->oscc_lock); oscc->oscc_obd = obd; - oscc->oscc_kick_barrier = 100; - oscc->oscc_grow_count = 2000; - oscc->oscc_initial_create_count = 2000; + oscc->oscc_grow_count = OST_MIN_PRECREATE; oscc->oscc_next_id = 2; oscc->oscc_last_id = 1; diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index bba36c0..fa9d933 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -27,14 +27,14 @@ struct osc_async_page { enum async_flags oap_async_flags; unsigned long oap_interrupted:1; - struct obd_io_group *oap_oig; + struct obd_io_group *oap_oig; struct oig_callback_context oap_occ; struct ptlrpc_request *oap_request; struct client_obd *oap_cli; struct lov_oinfo *oap_loi; struct obd_async_page_ops *oap_caller_ops; - void *oap_caller_data; + void *oap_caller_data; }; struct osc_cache_waiter { @@ -44,10 +44,11 @@ struct osc_cache_waiter { int ocw_rc; }; -#define OSCC_FLAG_RECOVERING 1 -#define OSCC_FLAG_CREATING 2 -#define OSCC_FLAG_NOSPC 4 /* can't create more objects on this OST */ -#define OSCC_FLAG_SYNC_IN_PROGRESS 8 /* only allow one thread to sync */ +#define OSCC_FLAG_RECOVERING 0x01 +#define OSCC_FLAG_CREATING 0x02 +#define OSCC_FLAG_NOSPC 0x04 /* can't create more objects on OST */ +#define OSCC_FLAG_SYNC_IN_PROGRESS 0x08 /* only allow one thread to sync */ +#define OSCC_FLAG_LOW 0x10 int osc_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti); diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c index 01fbe35..0b55543 100644 --- a/lustre/portals/libcfs/debug.c +++ b/lustre/portals/libcfs/debug.c @@ -352,9 +352,7 @@ out: char *portals_debug_dumpstack(void) { - char *buf = stack_backtrace; - buf[0] = '\0'; - return buf; + return "dump_stack\n"; } #endif /* __arch_um__ */ diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index d1806e6..40e7d72 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -259,10 +259,8 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) initial_connect = 1; } else { committed_before_reconnect = imp->imp_peer_committed_transno;; - } - spin_unlock_irqrestore(&imp->imp_lock, flags); if (new_uuid) { diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 66a988c..354c611 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -115,7 +115,8 @@ static int ptlrpc_pinger_main(void *arg) spin_unlock_irqrestore(&imp->imp_lock, flags); if (imp->imp_next_ping <= this_ping || force) { - if (level == LUSTRE_IMP_DISCON) { + if (level == LUSTRE_IMP_DISCON && + !imp->imp_deactive) { /* wait at least a timeout before trying recovery again. */ imp->imp_next_ping = jiffies + diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index a86679d..9c8b56e 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -317,10 +317,12 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active) * requests. */ if (!active) { ptlrpc_invalidate_import(imp, 0); + imp->imp_deactive = 1; } /* When activating, mark import valid, and attempt recovery */ if (active) { + imp->imp_deactive = 0; CDEBUG(D_HA, "setting import %s VALID\n", imp->imp_target_uuid.uuid); rc = ptlrpc_recover_import(imp, NULL); diff --git a/lustre/scripts/lustre.spec.in b/lustre/scripts/lustre.spec.in index 06f93d3..bbde7a5 100644 --- a/lustre/scripts/lustre.spec.in +++ b/lustre/scripts/lustre.spec.in @@ -144,7 +144,6 @@ mkdir -p $RPM_BUILD_ROOT/var/lib/ldap/lustre %attr(-, root, root) /usr/bin/lstripe %attr(-, root, root) /usr/bin/mcreate %attr(-, root, root) /usr/bin/munlink -%attr(-, root, root) /usr/bin/mkdirmany %attr(-, root, root) /usr/lib/lustre/python %attr(-, root, root) /usr/lib/lustre/examples diff --git a/lustre/scripts/merge1.sh b/lustre/scripts/merge1.sh index a862436..c2d642f4 100755 --- a/lustre/scripts/merge1.sh +++ b/lustre/scripts/merge1.sh @@ -80,7 +80,7 @@ $CVS update -j ${CHILD}_BASE -j ${PARENT}_${CHILD}_UPDATE_PARENT_$date -dP echo "done" echo -n "Recording conflicts in $CONFLICTS ..." -if $CVS update | grep '^C' > $CONFLICTS; then +if $CVS update | awk '/^C/ { print $2 }' > $CONFLICTS; then echo "Conflicts found, fix before committing." cat $CONFLICTS else diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore index bc148be..d828db6 100644 --- a/lustre/tests/.cvsignore +++ b/lustre/tests/.cvsignore @@ -65,3 +65,5 @@ ll_dirstripe_verify openfilleddirunlink copy_attr rename_many +memhog +rmdirmany diff --git a/lustre/tests/cmknod.c b/lustre/tests/cmknod.c index c8659e4..920ee5b 100644 --- a/lustre/tests/cmknod.c +++ b/lustre/tests/cmknod.c @@ -7,6 +7,7 @@ #include #include #include + #define TEST_MINOR 120 #define TEST_MAJOR 25 @@ -16,18 +17,48 @@ void usage(char *prog) exit(1); } +/* UMKA: This stuff inlined here instead of using appropriate header + to avoid linking to symbols which is not present in newer libc. + + Currently this is the case, as UML image contains RedHat 9 and + developers use something newer (Fedora, etc.). */ +inline unsigned int +__gnu_dev_major (unsigned long long int __dev) +{ + return ((__dev >> 8) & 0xfff) | ((unsigned int) (__dev >> 32) & ~0xfff); +} + +inline unsigned int +__gnu_dev_minor (unsigned long long int __dev) +{ + return (__dev & 0xff) | ((unsigned int) (__dev >> 12) & ~0xff); +} + +inline unsigned long long int +__gnu_dev_makedev (unsigned int __major, unsigned int __minor) +{ + return ((__minor & 0xff) | ((__major & 0xfff) << 8) + | (((unsigned long long int) (__minor & ~0xff)) << 12) + | (((unsigned long long int) (__major & ~0xfff)) << 32)); +} + +#define __minor(dev) __gnu_dev_minor(dev) +#define __major(dev) __gnu_dev_major(dev) +#define __makedev(maj, min) __gnu_dev_makedev(maj, min) + int main( int argc, char **argv) { char *prog = argv[0]; char *filename = argv[1]; int rc; struct stat st; - dev_t device = makedev(TEST_MAJOR, TEST_MINOR); + dev_t device = __makedev(TEST_MAJOR, TEST_MINOR); if (argc != 2) usage(prog); unlink(filename); + /* First try block devices */ rc = mknod(filename, 0700 | S_IFBLK, device); if ( rc < 0 ) { @@ -42,8 +73,10 @@ int main( int argc, char **argv) prog, filename, errno, strerror(errno)); return 3; } + if ( st.st_rdev != device) { - fprintf(stderr, "%s: created device other than requested: (%d,%d) instead of (%d,%d)\n", prog, major(st.st_rdev),minor(st.st_rdev),major(device),minor(device)); + fprintf(stderr, "%s: created device other than requested: (%u,%u) instead of (%u,%u)\n", + prog, __major(st.st_rdev),__minor(st.st_rdev),__major(device),__minor(device)); return 4; } if (!S_ISBLK(st.st_mode)) { @@ -73,7 +106,8 @@ int main( int argc, char **argv) return 8; } if ( st.st_rdev != device) { - fprintf(stderr, "%s: created device other than requested: (%d,%d) instead of (%d,%d)\n", prog, major(st.st_rdev),minor(st.st_rdev),major(device),minor(device)); + fprintf(stderr, "%s: created device other than requested: (%u,%u) instead of (%u,%u)\n", + prog, __major(st.st_rdev),__minor(st.st_rdev),__major(device),__minor(device)); return 9; } if (!S_ISCHR(st.st_mode)) { diff --git a/lustre/tests/compile.sh b/lustre/tests/compile.sh index 2e719b5..a3c6b7a 100644 --- a/lustre/tests/compile.sh +++ b/lustre/tests/compile.sh @@ -1,7 +1,7 @@ #!/bin/sh set -evx -MOUNT=${MNT:-/mnt/lustre} +MOUNT=${MOUNT:-/mnt/lustre} DIR=${DIR:-$MOUNT} SRC=${SRC:-`dirname $0`/../..} export CC=${CC:-gcc} diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 27d7370..63c66bb 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -623,7 +623,7 @@ test_16() { mount_client $MOUNT check_mount || return 41 cleanup || return $? - fi + fi echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555" [ -d $TMPMTPT ] || mkdir -p $TMPMTPT diff --git a/lustre/tests/lockorder.sh b/lustre/tests/lockorder.sh index 6593c33..2997518 100644 --- a/lustre/tests/lockorder.sh +++ b/lustre/tests/lockorder.sh @@ -7,7 +7,7 @@ STATMANY=${STATMANY:-statmany} UNLINKMANY=${UNLINKMANY:-unlinkmany} LCTL=${LCTL:-lctl} -MOUNT1=${MOUNT1:-/mnt/lustre} +MOUNT1=${MOUNT1:-/mnt/lustre1} MOUNT2=${MOUNT2:-/mnt/lustre2} DIR=${DIR:-$MOUNT1} DIR2=${DIR2:-$MOUNT2} diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh index 75b94c0..e370474 100755 --- a/lustre/tests/lov.sh +++ b/lustre/tests/lov.sh @@ -14,7 +14,7 @@ MDSSIZE=${MDSSIZE:-400000} FSTYPE=${FSTYPE:-ext3} MOUNT=${MOUNT:-/mnt/lustre} MOUNT2=${MOUNT2:-${MOUNT}2} -NETWORKTYPE=${NETWORKTYPE:-tcp} +NETTYPE=${NETTYPE:-tcp} OSTCOUNT=${OSTCOUNT:-5} # OSTDEVN will still override the device for OST N @@ -35,8 +35,8 @@ rm -f $config # create nodes ${LMC} --add node --node localhost || exit 10 -${LMC} --add net --node localhost --nid `hostname` --nettype $NETWORKTYPE || exit 11 -${LMC} --add net --node client --nid '*' --nettype $NETWORKTYPE || exit 12 +${LMC} --add net --node localhost --nid `hostname` --nettype $NETTYPE || exit 11 +${LMC} --add net --node client --nid '*' --nettype $NETTYPE || exit 12 # configure mds server ${LMC} --format --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20 diff --git a/lustre/tests/recovery-cleanup.sh b/lustre/tests/recovery-cleanup.sh index e4eefd0..a70c5f3 100755 --- a/lustre/tests/recovery-cleanup.sh +++ b/lustre/tests/recovery-cleanup.sh @@ -19,7 +19,7 @@ PDSH='pdsh -S -w' MDSNODE=${MDSNODE:-mdev6} OSTNODE=${OSTNODE:-mdev7} CLIENT=${CLIENT:-mdev8} -NETWORKTYPE=${NETWORKTYPE:-tcp} +NETTYPE=${NETTYPE:-tcp} MOUNTPT=${MOUNTPT:-/mnt/lustre} CONFIG=${CONFIG:-recovery-cleanup.xml} MDSDEV=${MDSDEV:-/tmp/mds1-`hostname`} @@ -49,8 +49,8 @@ drop_request() { make_config() { rm -f $CONFIG for NODE in $CLIENT $MDSNODE $OSTNODE; do - lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \ - --nettype $NETWORKTYPE || exit 4 + lmc -m $CONFIG --add net --node $NODE --nid `h2$NETTYPE $NODE` \ + --nettype $NETTYPE || exit 4 done lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE \ --dev $MDSDEV --size $MDSSIZE || exit 5 diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index ed1cec6..4f6333b 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -26,7 +26,7 @@ CLEANUP=${CLEANUP:-"cleanup"} make_config() { rm -f $XMLCONFIG add_mds mds --dev $MDSDEV --size $MDSSIZE - add_lov lov1 mds --stripe_sz $STRIPE_BYTES\ + add_lov lov1 mds --stripe_sz $STRIPE_BYTES \ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE @@ -248,9 +248,10 @@ test_17() { # client will get evicted here sysctl -w lustre.fail_loc=0x80000503 do_facet client cp /etc/termcap $DIR/$tfile - sysctl -w lustre.fail_loc=0 sleep $TIMEOUT + sysctl -w lustre.fail_loc=0 + do_facet client "df $DIR" # expect cmp to fail do_facet client "cmp /etc/termcap $DIR/$tfile" && return 1 do_facet client "rm $DIR/$tfile" || return 2 diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index fdea9d0..8597d95 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -19,7 +19,7 @@ gen_config() { add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE fi - add_lov lov1 mds --stripe_sz $STRIPE_BYTES\ + add_lov lov1 mds --stripe_sz $STRIPE_BYTES \ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE --failover add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE --failover diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index d11007a..b0f39ef 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -120,7 +120,7 @@ test_4() { run_test 4 "Fail OST during read, with verification" test_5() { - FREE=`df -h $DIR | tail -n 1 | awk '{ print $3 }'` + FREE=`df -P -h $DIR | tail -n 1 | awk '{ print $3 }'` case $FREE in *T|*G) FREE=1G;; esac diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 876cc20..252d825 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -24,7 +24,7 @@ gen_config() { add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE fi - add_lov lov1 mds --stripe_sz $STRIPE_BYTES\ + add_lov lov1 mds --stripe_sz $STRIPE_BYTES \ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE @@ -659,7 +659,7 @@ test_32() { # give multiop a chance to open sleep 1 mds_evict_client - df $MOUNT || df $MOUNT || return 1 + df $MOUNT || sleep 1 && df $MOUNT || return 1 kill -USR1 $pid1 kill -USR1 $pid2 sleep 1 @@ -834,14 +834,14 @@ run_test 41 "read from a valid osc while other oscs are invalid" # test MDS recovery after ost failure test_42() { - blocks=`df $MOUNT | tail -n 1 | awk '{ print $1 }'` + blocks=`df -P $MOUNT | tail -n 1 | awk '{ print $2 }'` createmany -o $DIR/$tfile-%d 800 replay_barrier ost unlinkmany $DIR/$tfile-%d 0 400 facet_failover ost # osc is evicted, fs is smaller - blocks_after=`df $MOUNT | tail -n 1 | awk '{ print $1 }'` + blocks_after=`df -P $MOUNT | tail -n 1 | awk '{ print $2 }'` [ $blocks_after -lt $blocks ] || return 1 echo wait for MDS to timeout and recover sleep $((TIMEOUT * 2)) @@ -851,7 +851,7 @@ test_42() { run_test 42 "recovery after ost failure" # b=2530 -# directory orphans can't be unlinked from PENDING directory +# timeout in MDS/OST recovery RPC will LBUG MDS test_43() { replay_barrier mds @@ -953,14 +953,14 @@ test_48() { } run_test 48 "MDS->OSC failure during precreate cleanup (2824)" -test_49() { +test_50() { local osc_dev=`$LCTL device_list | \ awk '(/ost_svc_mds_svc/){print $4}' ` $LCTL --device %$osc_dev recover && $LCTL --device %$osc_dev recover # give the mds_lov_sync threads a chance to run sleep 5 } -run_test 49 "Double OSC recovery, don't LASSERT" +run_test 50 "Double OSC recovery, don't LASSERT (3812)" # b3764 timed out lock replay test_52() { diff --git a/lustre/tests/runas.c b/lustre/tests/runas.c index 91c9c7f..3b76af6 100644 --- a/lustre/tests/runas.c +++ b/lustre/tests/runas.c @@ -18,7 +18,7 @@ #endif static const char usage[] = -"Usage: %s -u user_id [-g grp_id ] [ -G ] command\n" +"Usage: %s -u user_id [-g grp_id ] [ -G[gid0,gid1,...] ] command\n" " -u user_id switch to UID user_id\n" " -g grp_id switch to GID grp_id\n" " -G[gid0,gid1,...] set supplementary groups\n"; diff --git a/lustre/tests/sanity-buffalo.sh b/lustre/tests/sanity-buffalo.sh index 70e4a39..02c95d1 100755 --- a/lustre/tests/sanity-buffalo.sh +++ b/lustre/tests/sanity-buffalo.sh @@ -31,7 +31,7 @@ export TESTDESC=${TESTDESC:-"Local test in $TARGET for correctness"} export TESTGROUP=${TESTGROUP:-"correctness"} export LUSTRE_TAG=${LUSTRE_TAG:-`cat $PWD/CVS/Tag | cut -c 2-`} export TESTARCH=${TESTARCH:-`uname -r`} -export NETWORKTYPE=${NETWORKTYPE:-"tcp"} +export NETTYPE=${NETTYPE:-"tcp"} export MACHINENAME=${MACHINENAME:-`hostname`} usage() { diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 0de342a..5d6a069 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -726,6 +726,7 @@ run_test 24t "mkdir .../R16a/b/c; rename .../R16a/b/c .../R16a =" test_25a() { echo '== symlink sanity =============================================' + mkdir $DIR/d25 ln -s d25 $DIR/s25 touch $DIR/s25/foo || error @@ -1695,24 +1696,25 @@ test_51() { } run_test 51 "special situations: split htree with empty entry ==" +export NUMTEST=70000 test_51b() { - NUMTEST=70000 - check_kernel_version 40 || NUMTEST=31000 NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` - [ $NUMFREE -lt $NUMTEST ] && \ - echo "skipping test 51b, not enough free inodes($NUMFREE)" && \ + [ $NUMFREE -lt 21000 ] && \ + echo "skipping test 51b, not enough free inodes ($NUMFREE)" && \ return + + check_kernel_version 40 || NUMTEST=31000 + [ $NUMFREE -lt $NUMTEST ] && NUMTEST=$(($NUMFREE - 50)) + mkdir -p $DIR/d51b (cd $DIR/d51b; mkdirmany t $NUMTEST) } run_test 51b "mkdir .../t-0 --- .../t-70000 ====================" test_51c() { - NUMTEST=70000 - check_kernel_version 40 || NUMTEST=31000 - NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` - [ $NUMFREE -lt $NUMTEST ] && echo "skipping test 51c" && return - mkdir -p $DIR/d51b + [ ! -d $DIR/d51b ] && echo "skipping test 51c: $DIR/51b missing" && \ + return + (cd $DIR/d51b; rmdirmany t $NUMTEST) } run_test 51c "rmdir .../t-0 --- .../t-70000 ====================" diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index c3e0a80..92ba931 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -3,8 +3,8 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 1768 3192 -ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4 14b"} +# bug number for skipped test: 1768 3192 3192 +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4 14b 14c"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! [ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT" @@ -51,11 +51,20 @@ log() { lctl mark "$*" 2> /dev/null || true } +trace() { + log "STARTING: $*" + strace -o $TMP/$1.strace -ttt $* + RC=$? + log "FINISHED: $*: rc $RC" + return 1 +} +TRACE=${TRACE:-""} + run_one() { if ! mount | grep -q $DIR1; then $START fi - log "== test $1: $2" + log "== test $1: $2 `date +%H:%M:%S`" export TESTNAME=test_$1 test_$1 || error "test_$1: exit with rc=$?" unset TESTNAME diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 610f068..3e2a6c3 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -188,6 +188,7 @@ fail_abort() { start $facet do_facet $facet lctl --device %${facet}_svc abort_recovery df $MOUNT || echo "first df failed: $?" + sleep 1 df $MOUNT || error "post-failover df: $?" } @@ -558,7 +559,7 @@ run_one() { # Pretty tests run faster. equals_msg $testnum: $message - log "== test $1: $2" + log "== test $testnum: $message =========== `date +%H:%M:%S`" test_${testnum} || error "test_$testnum failed with $?" } diff --git a/lustre/tests/uml.sh b/lustre/tests/uml.sh index 342883b..fbdd284 100644 --- a/lustre/tests/uml.sh +++ b/lustre/tests/uml.sh @@ -15,10 +15,11 @@ OSTDEVBASE=$TMP/ost #etc OSTSIZE=${OSTSIZE:-100000} STRIPECNT=${STRIPECNT:-1} -STRIPESZ=${STRIPESZ:-$((1024 * 1024))} +STRIPE_BYTES=${STRIPE_BYTES:-$((1024 * 1024))} OSDTYPE=${OSDTYPE:-obdfilter} OSTFAILOVER=${OSTFAILOVER:-} +MOUNT=${MOUNT:-/mnt/lustre} FSTYPE=${FSTYPE:-ext3} NETTYPE=${NETTYPE:-tcp} @@ -88,7 +89,7 @@ echo; echo "adding MDS on: $MDSNODE" ${LMC} -m $config --add mds --format --node $MDSNODE --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE ||exit 10 # configure ost -${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPESZ --stripe_cnt $STRIPECNT --stripe_pattern 0 || exit 20 +${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPECNT --stripe_pattern 0 || exit 20 COUNT=1 echo -n "adding OST on:" for NODE in $OSTNODES; do @@ -111,6 +112,6 @@ done echo; echo -n "adding CLIENT on:" for NODE in $CLIENTS; do echo -n " $NODE" - ${LMC} -m $config --add mtpt --node $NODE --path /mnt/lustre --mds mds1 --lov lov1 || exit 30 + ${LMC} -m $config --add mtpt --node $NODE --path $MOUNT --mds mds1 --lov lov1 || exit 30 done echo diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 75a2906..8620bf8 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -98,13 +98,17 @@ command_t cmdlist[] = { "add an entry to the portals routing table\n" "usage: add_route []"}, {"del_route", jt_ptl_del_route, 0, - "delete the route via the given gateway to the given targets from the portals routing table\n" + "delete route via gateway to targets from the portals routing table\n" "usage: del_route [] []"}, {"set_route", jt_ptl_notify_router, 0, - "enable/disable routes via the given gateway in the portals routing table\n" + "enable/disable routes via gateway in the portals routing table\n" "usage: set_route [