char *portals_debug_dumpstack(void)
{
- char *buf = stack_backtrace;
- buf[0] = '\0';
- return buf;
+ return "dump_stack\n";
}
#endif /* __arch_um__ */
+tbd Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.2.6
+ * bug fixes
+ - avoid crash during MDS cleanup with OST shut down (2775)
+ - fix loi_list_lock/oig_lock inversion on interrupted IO (4136)
+ - don't use bad inodes on the MDS (3744)
+ - dynamic object preallocation to improve recovery speed (4236)
+ - don't hold spinlock over lock dumping or change debug flags (4401)
+ - don't zero obd_dev when it is force cleaned (3651)
+ - print grants to console if they go negative (4431)
+ - "lctl deactivate" will stop automatic recovery attempts (3406)
+ - look for existing locks in ldlm_handle_enqueue() (3764)
+ - don't resolve lock handle twice in recovery avoiding race (4401)
+
2004-08-24 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.2.5
* bug fixes
- replace some LBUG about llog ops with error handling (3841)
- don't match INVALID dentries from d_lookup and spin (3784)
- hold dcache_lock while marking dentries INVALID and hashing (4255)
+ - fix invalid assertion in ptlrpc_set_wait (3880)
* miscellania
- add libwrap support for the TCP acceptor (3996)
- add /proc/sys/portals/routes for non-root route listing (3994)
AC_INIT
AC_CANONICAL_SYSTEM
-AM_INIT_AUTOMAKE(lustre, 1.2.5)
+AM_INIT_AUTOMAKE(lustre, 1.2.5.4)
# AM_MAINTAINER_MODE
# Four main targets: lustre kernel modules, utilities, tests, and liblustre
struct list_head ns_unused_list; /* all root resources in ns */
int ns_nr_unused;
unsigned int ns_max_unused;
+ unsigned long ns_next_dump; /* next dump time */
spinlock_t ns_counter_lock;
__u64 ns_locks;
struct ldlm_lock *lock);
void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
-void ldlm_dump_all_namespaces(void);
-void ldlm_namespace_dump(struct ldlm_namespace *);
-void ldlm_resource_dump(struct ldlm_resource *);
+void ldlm_dump_all_namespaces(int level);
+void ldlm_namespace_dump(int level, struct ldlm_namespace *);
+void ldlm_resource_dump(int level, struct ldlm_resource *);
int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
struct ldlm_res_id);
struct obd_device *oscc_obd;
obd_id oscc_last_id;//last available pre-created object
obd_id oscc_next_id;// what object id to give out next
- int oscc_initial_create_count;
int oscc_grow_count;
- int oscc_kick_barrier;
struct osc_created *oscc_osccd;
struct obdo oscc_oa;
int oscc_flags;
struct filter_client_data *fed_fcd;
loff_t fed_lr_off;
int fed_lr_idx;
- unsigned long fed_dirty; /* in bytes */
- unsigned long fed_grant; /* in bytes */
- unsigned long fed_pending; /* bytes just being written */
+ long fed_dirty; /* in bytes */
+ long fed_grant; /* in bytes */
+ long fed_pending; /* bytes just being written */
};
struct obd_export {
#define FSFILT_OP_LINK 9
#define FSFILT_OP_CANCEL_UNLINK 10
+#define fsfilt_check_slow(start, timeout, msg) \
+do { \
+ if (time_before(jiffies, start + 15 * HZ)) \
+ break; \
+ else if (time_before(jiffies, start + timeout / 2 * HZ)) \
+ CWARN("slow %s %lus\n", msg, (jiffies - start) / HZ); \
+ else \
+ CERROR("slow %s %lus\n", msg, (jiffies - start) / HZ); \
+} while (0)
+
static inline void *fsfilt_start_log(struct obd_device *obd,
struct inode *inode, int op,
struct obd_trans_info *oti, int logs)
LBUG();
}
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "journal start");
return handle;
}
LBUG();
}
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "journal start");
return handle;
}
int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync);
CDEBUG(D_INFO, "committing handle %p\n", handle);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "journal start");
return rc;
}
int rc = obd->obd_fsops->fs_commit_async(inode, handle, wait_handle);
CDEBUG(D_INFO, "committing handle %p (async)\n", *wait_handle);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "journal start");
return rc;
}
unsigned long now = jiffies;
int rc = obd->obd_fsops->fs_commit_wait(inode, handle);
CDEBUG(D_INFO, "waiting for completion %p\n", handle);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "journal start");
return rc;
}
unsigned long now = jiffies;
int rc;
rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long setattr time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "setattr");
return rc;
}
obd_count o_nlink; /* brw: checksum */
obd_count o_generation;
obd_flag o_valid; /* hot fields in this obdo */
- obd_count o_misc;
+ obd_count o_misc; /* brw: o_dropped */
__u32 o_easize; /* epoch in ost writes */
char o_inline[OBD_INLINESZ]; /* fid in ost writes */
};
#define OBD_OBJECT_EOF 0xffffffffffffffffULL
+#define OST_MIN_PRECREATE 32
+#define OST_MAX_PRECREATE 20000
+
struct obd_ioobj {
obd_id ioo_id;
obd_gr ioo_gr;
int imp_invalid:1, imp_replayable:1,
imp_dlm_fake:1, imp_server_timeout:1,
imp_initial_recov:1, imp_force_verify:1,
- imp_pingable:1, imp_resend_replay:1;
+ imp_pingable:1, imp_resend_replay:1,
+ imp_deactive:1;
__u32 imp_connect_op;
};
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * lustre VFS/process permission interface
+ */
+
#ifndef __LVFS_H__
#define __LVFS_H__
#include <linux/kp30.h>
-#define LL_FID_NAMELEN (16 + 1 + 8 + 1)
+#define LL_FID_NAMELEN (16 + 1 + 8 + 1)
#if defined __KERNEL__
#include <linux/lvfs_linux.h>
dchild = lookup_one_len(fid_name, dparent, fid_namelen);
up(&dparent->d_inode->i_sem);
+ if (IS_ERR(dchild) || dchild->d_inode == NULL)
+ return dchild;
+
+ if (is_bad_inode(dchild->d_inode)) {
+ CERROR("bad inode returned %lu/%u\n",
+ dchild->d_inode->i_ino, dchild->d_inode->i_generation);
+ dput(dchild);
+ dchild = ERR_PTR(-ENOENT);
+ }
return dchild;
}
* callees of this method are encouraged to abort their state
* in the oig. This may be called multiple times. */
void (*occ_interrupted)(struct oig_callback_context *occ);
+ int interrupted;
};
/* if we find more consumers this could be generalized */
#define OSC_MAX_RIF_DEFAULT 8
#define OSC_MAX_RIF_MAX 64
-#define OSC_MAX_DIRTY_DEFAULT 8
+#define OSC_MAX_DIRTY_DEFAULT 32
#define OSC_MAX_DIRTY_MB_MAX 512 /* totally arbitrary */
struct mdc_rpc_lock;
int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare);
void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
-static inline int obd_check_conn(struct lustre_handle *conn)
-{
- struct obd_device *obd;
- if (!conn) {
- CERROR("NULL conn\n");
- RETURN(-ENOTCONN);
- }
-
- obd = class_conn2obd(conn);
- if (!obd) {
- CERROR("NULL obd\n");
- RETURN(-ENODEV);
- }
-
- if (!obd->obd_attached) {
- CERROR("obd %d not attached\n", obd->obd_minor);
- RETURN(-ENODEV);
- }
-
- if (!obd->obd_set_up) {
- CERROR("obd %d not setup\n", obd->obd_minor);
- RETURN(-ENODEV);
- }
-
- if (!obd->obd_type) {
- CERROR("obd %d not typed\n", obd->obd_minor);
- RETURN(-ENODEV);
- }
-
- if (!obd->obd_type->typ_ops) {
- CERROR("obd_check_conn: obd %d no operations\n",
- obd->obd_minor);
- RETURN(-EOPNOTSUPP);
- }
- return 0;
-}
-
-
#define OBT(dev) (dev)->obd_type
#define OBP(dev, op) (dev)->obd_type->typ_ops->o_ ## op
#define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op
-/* Ensure obd_setup: used for disconnect which might be called while
- an obd is stopping. */
-#define OBD_CHECK_SETUP(conn, exp) \
-do { \
- if (!(conn)) { \
- CERROR("NULL connection\n"); \
- RETURN(-EINVAL); \
- } \
- \
- exp = class_conn2export(conn); \
- if (!(exp)) { \
- CERROR("No export for conn "LPX64"\n", (conn)->cookie); \
- RETURN(-EINVAL); \
- } \
- \
- if (!(exp)->exp_obd->obd_set_up) { \
- CERROR("Device %d not setup\n", \
- (exp)->exp_obd->obd_minor); \
- class_export_put(exp); \
- RETURN(-EINVAL); \
- } \
-} while (0)
-
-/* Ensure obd_setup and !obd_stopping. */
-#define OBD_CHECK_ACTIVE(conn, exp) \
-do { \
- if (!(conn)) { \
- CERROR("NULL connection\n"); \
- RETURN(-EINVAL); \
- } \
- \
- exp = class_conn2export(conn); \
- if (!(exp)) { \
- CERROR("No export for conn "LPX64"\n", (conn)->cookie); \
- RETURN(-EINVAL); \
- } \
- \
- if (!(exp)->exp_obd->obd_set_up || (exp)->exp_obd->obd_stopping) { \
- CERROR("Device %d not setup\n", \
- (exp)->exp_obd->obd_minor); \
- class_export_put(exp); \
- RETURN(-EINVAL); \
- } \
-} while (0)
-
/* Ensure obd_setup: used for cleanup which must be called
while obd is stopping */
-#define OBD_CHECK_DEV_STOPPING(obd) \
+#define OBD_CHECK_DEV(obd) \
do { \
if (!(obd)) { \
CERROR("NULL device\n"); \
RETURN(-ENODEV); \
} \
- \
+} while (0)
+
+#define OBD_CHECK_DEV_STOPPING(obd) \
+do { \
+ OBD_CHECK_DEV(obd); \
if (!(obd)->obd_set_up) { \
CERROR("Device %d not setup\n", \
(obd)->obd_minor); \
/* ensure obd_setup and !obd_stopping */
#define OBD_CHECK_DEV_ACTIVE(obd) \
do { \
- if (!(obd)) { \
- CERROR("NULL device\n"); \
- RETURN(-ENODEV); \
- } \
- \
+ OBD_CHECK_DEV(obd); \
if (!(obd)->obd_set_up || (obd)->obd_stopping) { \
CERROR("Device %d not setup\n", \
(obd)->obd_minor); \
struct obd_import *imp,
enum obd_import_event event)
{
+ if (!obd) {
+ CERROR("NULL device\n");
+ EXIT;
+ return;
+ }
if (obd->obd_set_up && OBP(obd, import_event)) {
OBD_COUNTER_INCREMENT(obd, import_event);
OBP(obd, import_event)(obd, imp, event);
struct obd_device *watched,
int active)
{
+ OBD_CHECK_DEV(obd);
if (!obd->obd_set_up) {
CERROR("obd %s not set up\n", obd->obd_name);
return -EINVAL;
struct obd_device *observer)
{
ENTRY;
+ OBD_CHECK_DEV(obd);
if (obd->obd_observer && observer)
RETURN(-EALREADY);
obd->obd_observer = observer;
--- /dev/null
+--- ./drivers/addon/qla2200/qla2x00.h 2004-07-26 12:52:08.000000000 +0100
++++ ./drivers/addon/qla2200/qla2x00.h 2004-07-26 12:58:42.000000000 +0100
+@@ -3208,7 +3208,7 @@ void qla2x00_setup(char *s);
+ /* Kernel version specific template additions */
+
+ /* Number of segments 1 - 65535 */
+-#define SG_SEGMENTS 32 /* Cmd entry + 6 continuations */
++#define SG_SEGMENTS 512 /* Cmd entry + 6 continuations */
+
+ /*
+ * Scsi_Host_template (see hosts.h)
+@@ -3222,7 +3222,7 @@ void qla2x00_setup(char *s);
+ *
+ */
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,8)
+-#define TEMPLATE_MAX_SECTORS max_sectors: 512,
++#define TEMPLATE_MAX_SECTORS max_sectors: 2048,
+ #else
+ #define TEMPLATE_MAX_SECTORS
+ #endif
+--- ./include/linux/blkdev.h 2004-07-26 12:53:11.000000000 +0100
++++ ./include/linux/blkdev.h 2004-07-26 13:12:42.000000000 +0100
+@@ -255,9 +255,9 @@ extern int * max_segments[MAX_BLKDEV];
+
+ extern char * blkdev_varyio[MAX_BLKDEV];
+
+-#define MAX_SEGMENTS 128
++#define MAX_SEGMENTS 256
+ #define MAX_SECTORS 255
+-#define MAX_SUPERBH 32768 /* must fit info ->b_size right now */
++#define MAX_SUPERBH (1<<20)
+
+ /*
+ * bh abuse :/
--- /dev/null
+--- ./fs/ext3/ialloc.c.orig 2004-08-02 13:26:38.000000000 -0700
++++ ./fs/ext3/ialloc.c 2004-09-03 14:57:14.000000000 -0700
+@@ -328,21 +328,143 @@
+ * directories already is chosen.
+ *
+ * For other inodes, search forward from the parent directory's block
+- * group to find a free inode.
++ * group to find a free inode in a group with some free blocks.
+ */
++static int find_group_dir(struct super_block *sb, const struct inode *parent,
++ struct ext3_group_desc **best_desc,
++ struct buffer_head **best_bh)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int ngroups = sbi->s_groups_count;
++ int avefreei;
++ struct ext3_group_desc *desc;
++ struct buffer_head *bh;
++ int group, best_group = -1, ndir_best = 999999999;
++
++ *best_desc = NULL;
++ *best_bh = NULL;
++
++ avefreei = le32_to_cpu(sbi->s_es->s_free_inodes_count) /
++ sbi->s_groups_count / ngroups;
++
++ for (group = 0; group < ngroups; group++) {
++ desc = ext3_get_group_desc(sb, group, &bh);
++ if (!desc || !desc->bg_free_inodes_count)
++ continue;
++ if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
++ continue;
++ if (le16_to_cpu(desc->bg_used_dirs_count) > ndir_best)
++ continue;
++ if (!*best_desc ||
++ (le16_to_cpu(desc->bg_free_blocks_count) >
++ le16_to_cpu((*best_desc)->bg_free_blocks_count))) {
++ *best_bh = bh;
++ *best_desc = desc;
++ best_group = group;
++ ndir_best = le16_to_cpu(desc->bg_used_dirs_count);
++ }
++ }
++
++ return best_group;
++}
++
++static int find_group_other(struct super_block *sb, const struct inode *parent,
++ struct ext3_group_desc **best_desc,
++ struct buffer_head **best_bh)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int parent_group = EXT3_I(parent)->i_block_group;
++ int ngroups = sbi->s_groups_count;
++ int avefreeb;
++ struct ext3_group_desc *desc;
++ struct buffer_head *bh;
++ int group, i, best_group = -1;
++
++ *best_desc = NULL;
++ *best_bh = NULL;
++
++ /*
++ * Try to place the inode in its parent directory
++ */
++ group = parent_group;
++ desc = ext3_get_group_desc (sb, group, &bh);
++ if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
++ le16_to_cpu(desc->bg_free_blocks_count)) {
++ *best_desc = desc;
++ *best_bh = bh;
++ return group;
++ }
++
++ /*
++ * We're going to place this inode in a different blockgroup from its
++ * parent. We want to cause files in a common directory to all land in
++ * the same blockgroup if it has space. But we want files which are
++ * in a different directory which shares a blockgroup with our parent
++ * to land in a different blockgroup.
++ *
++ * So add our directory's i_ino into the starting point for the hash.
++ */
++ group = (group + parent->i_ino) % ngroups;
++
++ avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) /
++ sbi->s_groups_count / ngroups;
++
++ /*
++ * Use a quadratic hash to find a group with a free inode and some free
++ * blocks.
++ */
++ for (i = 1; i < ngroups; i <<= 1) {
++ group += i;
++ if (group >= ngroups)
++ group -= ngroups;
++ desc = ext3_get_group_desc(sb, group, &bh);
++ if (!desc || !desc->bg_free_inodes_count)
++ continue;
++ if (le16_to_cpu(desc->bg_free_blocks_count) > avefreeb) {
++ *best_bh = bh;
++ *best_desc = desc;
++ return group;
++ }
++ }
++
++ /*
++ * That failed: try linear search for a group with free inodes and
++ * preferrably free blocks, returning as soon as we find a good one.
++ */
++ group = sbi->s_last_group;
++ for (i = 0; i < ngroups; i++) {
++ if (++group >= ngroups)
++ group = 0;
++ desc = ext3_get_group_desc(sb, group, &bh);
++ if (!desc || !desc->bg_free_inodes_count)
++ continue;
++ if (!*best_desc ||
++ (le16_to_cpu(desc->bg_free_blocks_count) >
++ le16_to_cpu((*best_desc)->bg_free_blocks_count))) {
++ *best_bh = bh;
++ *best_desc = desc;
++ best_group = group;
++ if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb)
++ break;
++ }
++ }
++ sbi->s_last_group = best_group;
++
++ return best_group;
++}
++
+ struct inode * ext3_new_inode(handle_t *handle, const struct inode * dir,
+ int mode, unsigned long goal)
+ {
+ struct super_block * sb;
+ struct buffer_head * bh;
+ struct buffer_head * bh2;
+- int i, j, avefreei;
++ int i, j;
+ struct inode * inode;
+ int bitmap_nr;
+ struct ext3_inode_info *ei;
+ struct ext3_sb_info *sbi;
+ struct ext3_group_desc * gdp;
+- struct ext3_group_desc * tmp;
+ struct ext3_super_block * es;
+ struct ext3_iloc iloc;
+ int err = 0;
+@@ -396,72 +518,10 @@
+ }
+
+ repeat:
+- gdp = NULL;
+- i = 0;
+-
+- if (S_ISDIR(mode)) {
+- avefreei = le32_to_cpu(es->s_free_inodes_count) /
+- sbi->s_groups_count;
+- if (!gdp) {
+- for (j = 0; j < sbi->s_groups_count; j++) {
+- struct buffer_head *temp_buffer;
+- tmp = ext3_get_group_desc (sb, j, &temp_buffer);
+- if (tmp &&
+- le16_to_cpu(tmp->bg_free_inodes_count) &&
+- le16_to_cpu(tmp->bg_free_inodes_count) >=
+- avefreei) {
+- if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) >
+- le16_to_cpu(gdp->bg_free_blocks_count))) {
+- i = j;
+- gdp = tmp;
+- bh2 = temp_buffer;
+- }
+- }
+- }
+- }
+- } else {
+- /*
+- * Try to place the inode in its parent directory
+- */
+- i = EXT3_I(dir)->i_block_group;
+- tmp = ext3_get_group_desc (sb, i, &bh2);
+- if (tmp && le16_to_cpu(tmp->bg_free_inodes_count))
+- gdp = tmp;
+- else
+- {
+- /*
+- * Use a quadratic hash to find a group with a
+- * free inode
+- */
+- for (j = 1; j < sbi->s_groups_count; j <<= 1) {
+- i += j;
+- if (i >= sbi->s_groups_count)
+- i -= sbi->s_groups_count;
+- tmp = ext3_get_group_desc (sb, i, &bh2);
+- if (tmp &&
+- le16_to_cpu(tmp->bg_free_inodes_count)) {
+- gdp = tmp;
+- break;
+- }
+- }
+- }
+- if (!gdp) {
+- /*
+- * That failed: try linear search for a free inode
+- */
+- i = EXT3_I(dir)->i_block_group + 1;
+- for (j = 2; j < sbi->s_groups_count; j++) {
+- if (++i >= sbi->s_groups_count)
+- i = 0;
+- tmp = ext3_get_group_desc (sb, i, &bh2);
+- if (tmp &&
+- le16_to_cpu(tmp->bg_free_inodes_count)) {
+- gdp = tmp;
+- break;
+- }
+- }
+- }
+- }
++ if (S_ISDIR(mode))
++ i = find_group_dir(sb, dir, &gdp, &bh2);
++ else
++ i = find_group_other(sb, dir, &gdp, &bh2);
+
+ err = -ENOSPC;
+ if (!gdp)
+--- linux/include/linux/ext3_fs_sb.h.orig 2004-08-26 13:28:53.000000000 -0600
++++ linux/include/linux/ext3_fs_sb.h 2004-08-31 11:04:27.000000000 -0600
+@@ -45,6 +45,7 @@ struct ext3_sb_info {
+ unsigned long s_gdb_count; /* Number of group descriptor blocks */
+ unsigned long s_desc_per_block; /* Number of group descriptors per block */
+ unsigned long s_groups_count; /* Number of groups in the fs */
++ unsigned long s_last_group; /* Last group used for inode allocation */
+ struct buffer_head * s_sbh; /* Buffer containing the super block */
+ struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */
+ struct buffer_head ** s_group_desc;
--- /dev/null
+Index: lum/fs/ext3/ialloc.c
+===================================================================
+--- lum.orig/fs/ext3/ialloc.c 2004-08-26 13:14:35.000000000 -0600
++++ lum/fs/ext3/ialloc.c 2004-08-31 15:00:35.000000000 -0600
+@@ -327,8 +327,131 @@ int ext3_itable_block_used(struct super_
+ * directories already is chosen.
+ *
+ * For other inodes, search forward from the parent directory's block
+- * group to find a free inode.
++ * group to find a free inode in a group with some free blocks.
+ */
++static int find_group_dir(struct super_block *sb, const struct inode *parent,
++ struct ext3_group_desc **best_desc,
++ struct buffer_head **best_bh)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int ngroups = sbi->s_groups_count;
++ int avefreei;
++ struct ext3_group_desc *desc;
++ struct buffer_head *bh;
++ int group, best_group = -1, ndir_best = 999999999;
++
++ *best_desc = NULL;
++ *best_bh = NULL;
++
++ avefreei = le32_to_cpu(sbi->s_es->s_free_inodes_count) /
++ sbi->s_groups_count / ngroups;
++
++ for (group = 0; group < ngroups; group++) {
++ desc = ext3_get_group_desc(sb, group, &bh);
++ if (!desc || !desc->bg_free_inodes_count)
++ continue;
++ if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
++ continue;
++ if (le16_to_cpu(desc->bg_used_dirs_count) > ndir_best)
++ continue;
++ if (!*best_desc ||
++ (le16_to_cpu(desc->bg_free_blocks_count) >
++ le16_to_cpu((*best_desc)->bg_free_blocks_count))) {
++ *best_bh = bh;
++ *best_desc = desc;
++ best_group = group;
++ ndir_best = le16_to_cpu(desc->bg_used_dirs_count);
++ }
++ }
++
++ return best_group;
++}
++
++static int find_group_other(struct super_block *sb, const struct inode *parent,
++ struct ext3_group_desc **best_desc,
++ struct buffer_head **best_bh)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int parent_group = EXT3_I(parent)->i_block_group;
++ int ngroups = sbi->s_groups_count;
++ int avefreeb;
++ struct ext3_group_desc *desc;
++ struct buffer_head *bh;
++ int group, i, best_group = -1;
++
++ *best_desc = NULL;
++ *best_bh = NULL;
++
++ /*
++ * Try to place the inode in its parent directory
++ */
++ group = parent_group;
++ desc = ext3_get_group_desc (sb, group, &bh);
++ if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
++ le16_to_cpu(desc->bg_free_blocks_count)) {
++ *best_desc = desc;
++ *best_bh = bh;
++ return group;
++ }
++
++ /*
++ * We're going to place this inode in a different blockgroup from its
++ * parent. We want to cause files in a common directory to all land in
++ * the same blockgroup if it has space. But we want files which are
++ * in a different directory which shares a blockgroup with our parent
++ * to land in a different blockgroup.
++ *
++ * So add our directory's i_ino into the starting point for the hash.
++ */
++ group = (group + parent->i_ino) % ngroups;
++
++ avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) /
++ sbi->s_groups_count / ngroups;
++
++ /*
++ * Use a quadratic hash to find a group with a free inode and some free
++ * blocks.
++ */
++ for (i = 1; i < ngroups; i <<= 1) {
++ group += i;
++ if (group >= ngroups)
++ group -= ngroups;
++ desc = ext3_get_group_desc(sb, group, &bh);
++ if (!desc || !desc->bg_free_inodes_count)
++ continue;
++ if (le16_to_cpu(desc->bg_free_blocks_count) > avefreeb) {
++ *best_bh = bh;
++ *best_desc = desc;
++ return group;
++ }
++ }
++
++ /*
++ * That failed: try linear search for a group with free inodes and
++ * preferrably free blocks, returning as soon as we find a good one.
++ */
++ group = sbi->s_last_group;
++ for (i = 0; i < ngroups; i++) {
++ if (++group >= ngroups)
++ group = 0;
++ desc = ext3_get_group_desc(sb, group, &bh);
++ if (!desc || !desc->bg_free_inodes_count)
++ continue;
++ if (!*best_desc ||
++ (le16_to_cpu(desc->bg_free_blocks_count) >
++ le16_to_cpu((*best_desc)->bg_free_blocks_count))) {
++ *best_bh = bh;
++ *best_desc = desc;
++ best_group = group;
++ if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb)
++ break;
++ }
++ }
++ sbi->s_last_group = best_group;
++
++ return best_group;
++}
++
+ struct inode * ext3_new_inode (handle_t *handle,
+ const struct inode * dir, int mode,
+ unsigned long goal)
+@@ -336,11 +459,10 @@ struct inode * ext3_new_inode (handle_t
+ struct super_block * sb;
+ struct buffer_head * bh;
+ struct buffer_head * bh2;
+- int i, j, avefreei;
++ int i, j;
+ struct inode * inode;
+ int bitmap_nr;
+ struct ext3_group_desc * gdp;
+- struct ext3_group_desc * tmp;
+ struct ext3_super_block * es;
+ struct ext3_iloc iloc;
+ int err = 0;
+@@ -392,72 +514,10 @@ struct inode * ext3_new_inode (handle_t
+ }
+
+ repeat:
+- gdp = NULL;
+- i = 0;
+-
+- if (S_ISDIR(mode)) {
+- avefreei = le32_to_cpu(es->s_free_inodes_count) /
+- sb->u.ext3_sb.s_groups_count;
+- if (!gdp) {
+- for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) {
+- struct buffer_head *temp_buffer;
+- tmp = ext3_get_group_desc (sb, j, &temp_buffer);
+- if (tmp &&
+- le16_to_cpu(tmp->bg_free_inodes_count) &&
+- le16_to_cpu(tmp->bg_free_inodes_count) >=
+- avefreei) {
+- if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) >
+- le16_to_cpu(gdp->bg_free_blocks_count))) {
+- i = j;
+- gdp = tmp;
+- bh2 = temp_buffer;
+- }
+- }
+- }
+- }
+- } else {
+- /*
+- * Try to place the inode in its parent directory
+- */
+- i = dir->u.ext3_i.i_block_group;
+- tmp = ext3_get_group_desc (sb, i, &bh2);
+- if (tmp && le16_to_cpu(tmp->bg_free_inodes_count))
+- gdp = tmp;
+- else
+- {
+- /*
+- * Use a quadratic hash to find a group with a
+- * free inode
+- */
+- for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) {
+- i += j;
+- if (i >= sb->u.ext3_sb.s_groups_count)
+- i -= sb->u.ext3_sb.s_groups_count;
+- tmp = ext3_get_group_desc (sb, i, &bh2);
+- if (tmp &&
+- le16_to_cpu(tmp->bg_free_inodes_count)) {
+- gdp = tmp;
+- break;
+- }
+- }
+- }
+- if (!gdp) {
+- /*
+- * That failed: try linear search for a free inode
+- */
+- i = dir->u.ext3_i.i_block_group + 1;
+- for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) {
+- if (++i >= sb->u.ext3_sb.s_groups_count)
+- i = 0;
+- tmp = ext3_get_group_desc (sb, i, &bh2);
+- if (tmp &&
+- le16_to_cpu(tmp->bg_free_inodes_count)) {
+- gdp = tmp;
+- break;
+- }
+- }
+- }
+- }
++ if (S_ISDIR(mode))
++ i = find_group_dir(sb, dir, &gdp, &bh2);
++ else
++ i = find_group_other(sb, dir, &gdp, &bh2);
+
+ err = -ENOSPC;
+ if (!gdp)
+Index: lum/include/linux/ext3_fs_sb.h
+===================================================================
+--- lum.orig/include/linux/ext3_fs_sb.h 2004-08-26 13:28:53.000000000 -0600
++++ lum/include/linux/ext3_fs_sb.h 2004-08-31 11:04:27.000000000 -0600
+@@ -45,6 +45,7 @@ struct ext3_sb_info {
+ unsigned long s_gdb_count; /* Number of group descriptor blocks */
+ unsigned long s_desc_per_block; /* Number of group descriptors per block */
+ unsigned long s_groups_count; /* Number of groups in the fs */
++ unsigned long s_last_group; /* Last group used for inode allocation */
+ struct buffer_head * s_sbh; /* Buffer containing the super block */
+ struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */
+ struct buffer_head ** s_group_desc;
--- /dev/null
+Index: 69chaos/fs/ext3/namei.c
+===================================================================
+--- 69chaos.orig/fs/ext3/namei.c 2004-08-24 23:56:04.000000000 -0700
++++ 69chaos/fs/ext3/namei.c 2004-08-24 23:57:20.000000000 -0700
+@@ -1542,11 +1542,16 @@
+ static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
+ {
+ inode->i_nlink++;
++ if (is_dx(inode) && inode->i_nlink > 1) {
++ if (inode->i_nlink >= 65000) /* limit is 16-bit i_links_count */
++ inode->i_nlink = 1;
++ }
+ }
+
+ static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
+ {
+- inode->i_nlink--;
++ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
++ inode->i_nlink--;
+ }
+
+ static int ext3_add_nondir(handle_t *handle,
+@@ -1650,7 +1655,7 @@
+ struct ext3_dir_entry_2 * de;
+ int err;
+
+- if (dir->i_nlink >= EXT3_LINK_MAX)
++ if (EXT3_DIR_LINK_MAXED(dir))
+ return -EMLINK;
+
+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+@@ -1672,7 +1677,7 @@
+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+ dir_block = ext3_bread (handle, inode, 0, 1, &err);
+ if (!dir_block) {
+- inode->i_nlink--; /* is this nlink == 0? */
++ ext3_dec_count(handle, inode); /* is this nlink == 0? */
+ ext3_mark_inode_dirty(handle, inode);
+ iput (inode);
+ goto out_stop;
+@@ -1704,7 +1709,7 @@
+ iput (inode);
+ goto out_stop;
+ }
+- dir->i_nlink++;
++ ext3_inc_count(handle, dir);
+ ext3_update_dx_flag(dir);
+ ext3_mark_inode_dirty(handle, dir);
+ d_instantiate(dentry, inode);
+@@ -1765,10 +1770,11 @@
+ }
+ de = (struct ext3_dir_entry_2 *) bh->b_data;
+ }
+- if (!ext3_check_dir_entry ("empty_dir", inode, de, bh,
+- offset)) {
+- brelse (bh);
+- return 1;
++ if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
++ /* On error skip the de and offset to the next block. */
++ de = (void *)(bh->b_data + sb->s_blocksize);
++ offset = (offset | (sb->s_blocksize - 1)) + 1;
++ continue;
+ }
+ if (le32_to_cpu(de->inode)) {
+ brelse (bh);
+@@ -1960,14 +1966,14 @@
+ retval = ext3_delete_entry(handle, dir, de, bh);
+ if (retval)
+ goto end_rmdir;
+- if (inode->i_nlink != 2)
+- ext3_warning (inode->i_sb, "ext3_rmdir",
+- "empty directory has nlink!=2 (%d)",
+- inode->i_nlink);
++ if (!EXT3_DIR_LINK_EMPTY(inode))
++ ext3_warning(inode->i_sb, __FUNCTION__,
++ "empty directory has too many links (%d)",
++ inode->i_nlink);
+ inode->i_version = ++event;
+ inode->i_nlink = 0;
+ ext3_orphan_add(handle, inode);
+- dir->i_nlink--;
++ ext3_dec_count(handle, dir);
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ ext3_mark_inode_dirty(handle, inode);
+ ext3_update_dx_flag(dir);
+@@ -2019,7 +2025,7 @@
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ ext3_update_dx_flag(dir);
+ ext3_mark_inode_dirty(handle, dir);
+- inode->i_nlink--;
++ ext3_dec_count(handle, inode);
+ if (!inode->i_nlink)
+ ext3_orphan_add(handle, inode);
+ inode->i_ctime = dir->i_ctime;
+@@ -2111,9 +2117,8 @@
+ if (S_ISDIR(inode->i_mode))
+ return -EPERM;
+
+- if (inode->i_nlink >= EXT3_LINK_MAX) {
++ if (EXT3_DIR_LINK_MAXED(inode))
+ return -EMLINK;
+- }
+
+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+ EXT3_INDEX_EXTRA_TRANS_BLOCKS);
+@@ -2197,8 +2202,8 @@
+ if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
+ goto end_rename;
+ retval = -EMLINK;
+- if (!new_inode && new_dir!=old_dir &&
+- new_dir->i_nlink >= EXT3_LINK_MAX)
++ if (!new_inode && new_dir != old_dir &&
++ EXT3_DIR_LINK_MAXED(new_dir))
+ goto end_rename;
+ }
+ if (!new_bh) {
+@@ -2256,7 +2261,7 @@
+ }
+
+ if (new_inode) {
+- new_inode->i_nlink--;
++ ext3_dec_count(handle, new_inode);
+ new_inode->i_ctime = CURRENT_TIME;
+ }
+ old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+@@ -2267,11 +2272,11 @@
+ PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
+ BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
+ ext3_journal_dirty_metadata(handle, dir_bh);
+- old_dir->i_nlink--;
++ ext3_dec_count(handle, old_dir);
+ if (new_inode) {
+- new_inode->i_nlink--;
++ ext3_dec_count(handle, new_inode);
+ } else {
+- new_dir->i_nlink++;
++ ext3_inc_count(handle, new_dir);
+ ext3_update_dx_flag(new_dir);
+ ext3_mark_inode_dirty(handle, new_dir);
+ }
+Index: 69chaos/include/linux/ext3_fs.h
+===================================================================
+--- 69chaos.orig/include/linux/ext3_fs.h 2004-08-24 23:55:45.000000000 -0700
++++ 69chaos/include/linux/ext3_fs.h 2004-08-24 23:56:47.000000000 -0700
+@@ -44,7 +44,7 @@
+ /*
+ * Always enable hashed directories
+ */
+-#define CONFIG_EXT3_INDEX
++#define CONFIG_EXT3_INDEX 1
+
+ /*
+ * Debug code
+@@ -582,14 +582,15 @@
+ */
+
+ #ifdef CONFIG_EXT3_INDEX
+- #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
+- EXT3_FEATURE_COMPAT_DIR_INDEX) && \
++#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
++ EXT3_FEATURE_COMPAT_DIR_INDEX) && \
+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
+-#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
+-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
++#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
++ (is_dx(dir) && (dir)->i_nlink == 1))
+ #else
+ #define is_dx(dir) 0
+-#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
+ #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
+ #endif
+
+ handle_t *handle = NULL;
+ int ret;
+ int orphan = 0;
-+ loff_t offset = blocknr << inode->i_blkbits; /* ugh */
++ loff_t offset = (loff_t)blocknr << inode->i_blkbits; /* ugh */
+ ssize_t count = iobuf->length; /* ditto */
+
+ if (rw == WRITE) {
+ handle_t *handle = NULL;
+ int ret;
+ int orphan = 0;
-+ loff_t offset = blocknr << inode->i_blkbits; /* ugh */
++ loff_t offset = (loff_t)blocknr << inode->i_blkbits; /* ugh */
+ ssize_t count = iobuf->length; /* ditto */
+
+ if (rw == WRITE) {
+ handle_t *handle = NULL;
+ int ret;
+ int orphan = 0;
-+ loff_t offset = blocknr << inode->i_blkbits; /* ugh */
++ loff_t offset = (loff_t)blocknr << inode->i_blkbits; /* ugh */
+ ssize_t count = iobuf->length; /* ditto */
+
+ if (rw == WRITE) {
+Version 40: >32000 subdirectories support for ext3 (b=3244)
Version 39: add EXPORT_SYMBOL(smp_num_siblings) to vanilla-2.4.24 (b=3966)
+ eebperf (unfragmented 1MB write/read support)
Version 38: drop dentry ref in ext3_add_link from open_connect_dentry (b=3266)
Version 37: fix htree rename-within-same-dir (b=3417), endianness (b=2447)
Version 36: don't dput dentry after error (b=2350), zero page->private (3119)
+++ /dev/null
-dev_read_only_hp_2.4.20.patch
-exports_2.4.20-rh-hp.patch
-kmem_cache_validate_hp.patch
-lustre_version.patch
-vfs_intent-2.4.20-hp.patch
-invalidate_show.patch
-export-truncate.patch
-iod-stock-24-exports_hp.patch
-ext-2.4-patch-1.patch
-ext-2.4-patch-2.patch
-ext-2.4-patch-3.patch
-ext-2.4-patch-4.patch
-linux-2.4.20-xattr-0.8.54-hp.patch
-ext3-2.4.20-fixes.patch
-ext3-2.4-ino_t.patch
-ext3-largefile.patch
-ext3-truncate_blocks.patch
-ext3-use-after-free.patch
-ext3-orphan_lock.patch
-ext3-delete_thread-2.4.20.patch
-ext3-noread-2.4.20.patch
-extN-wantedi.patch
-ext3-san-2.4.20.patch
-ext3-map_inode_page.patch
-ext3-error-export.patch
-iopen-2.4.20.patch
-tcp-zero-copy.patch
-socket-exports-vanilla.patch
-add_page_private.patch
-removepage-2.4.20.patch
-jbd-ctx_switch.patch
-jbd-flushtime.patch
-jbd-get_write_access.patch
-nfs_export_kernel-2.4.20-hp.patch
-ext3-ea-in-inode-2.4.20.patch
-bproc-patch-2.4.20
-ext3-truncate-buffer-head.patch
+++ /dev/null
-dev_read_only.patch
-exports.patch
-lustre_version.patch
-vfs_intent-2.4.18-18-chaos65.patch
-invalidate_show.patch
-iod-rmap-exports.patch
-export-truncate.patch
-htree-ext3-2.4.18.patch
-linux-2.4.18ea-0.8.26.patch
-ext3-2.4-ino_t.patch
-ext3-2.4.18-ino_sb_macro.patch
-ext3-orphan_lock.patch
-ext3-delete_thread-2.4.18.patch
-extN-misc-fixup.patch
-extN-noread.patch
-extN-wantedi.patch
-ext3-san-2.4.20.patch
-extN-2.4.18-ino_sb_fixup.patch
-ext3-map_inode_page_2.4.18.patch
-ext3-error-export.patch
-iopen-2.4.18.patch
-jbd-dont-account-blocks-twice.patch
-jbd-commit-tricks.patch
-ext3-o_direct-1-2.4.18-chaos.patch
-ext3-no-write-super-chaos.patch
-add_page_private.patch
-ext3-extents-2.4.18-chaos.patch
-ext3-extents-oflag-2.4.18-chaos.patch
-ext3-raw-lookup.patch
-nfs_export_kernel-2.4.18.patch
-ext3-ea-in-inode-2.4.18-chaos.patch
-listman-2.4.18.patch
-ext3-trusted_ea-2.4.18.patch
-gfp_memalloc-2.4.18-chaos.patch
-ext3-xattr-ptr-arith-fix.patch
-kernel_text_address-2.4.18-chaos.patch
-procfs-ndynamic-2.4.patch
-ext3-truncate-buffer-head.patch
-inode-max-readahead-2.4.24.patch
-dcache_refcount_debug.patch
+++ /dev/null
-dev_read_only.patch
-exports.patch
-kmem_cache_validate.patch
-lustre_version.patch
-vfs_intent-2.4.18-18-chaos65.patch
-invalidate_show.patch
-iod-rmap-exports.patch
-export-truncate.patch
-ext3-compat-2.4.18-chaos.patch
-ext3-htree.patch
-linux-2.4.18ea-0.8.26-2.patch
-ext3-2.4-ino_t.patch
-ext3-2.4.18-ino_sb_macro-2.patch
-ext3-orphan_lock.patch
-ext3-delete_thread-2.4.18-2.patch
-extN-misc-fixup.patch
-extN-noread.patch
-extN-wantedi.patch
-ext3-san-2.4.20.patch
-extN-2.4.18-ino_sb_fixup.patch
-ext3-map_inode_page_2.4.18.patch
-ext3-error-export.patch
-iopen-2.4.18-2.patch
-jbd-dont-account-blocks-twice.patch
-jbd-commit-tricks.patch
-ext3-o_direct-1-2.4.18-chaos.patch
-ext3-no-write-super-chaos.patch
-add_page_private.patch
-dynamic-locks-2.4.18-chaos.patch
-vfs-pdirops-2.4.18-chaos.patch
-ext3-pdirops-2.4.18-chaos.patch
-ext3-extents-2.4.18-chaos-pdirops.patch
-nfs_export_kernel-2.4.18.patch
-ext3-raw-lookup-pdirops.patch
-ext3-truncate-buffer-head.patch
ext3-truncate-buffer-head.patch
inode-max-readahead-2.4.24.patch
dcache_refcount_debug.patch
+blkdev_tunables-2.4.21-chaos.patch
+ext3-nlinks-2.4.21-chaos.patch
--- /dev/null
+configurable-x86-stack-2.4.21-chaos.patch
+dev_read_only_2.4.21-chaos.patch
+exports_2.4.19-suse.patch
+lustre_version.patch
+vfs_intent-2.4.21-rhel.patch
+invalidate_show-2.4.20-rh.patch
+iod-rmap-exports-2.4.21-chaos.patch
+export-truncate.patch
+ext3-htree-2.4.21-chaos.patch
+linux-2.4.21-xattr-0.8.54-chaos.patch
+ext3-ino_sb_macro-2.4.21-chaos.patch
+ext3-orphan_lock-2.4.22-rh.patch
+ext3-delete_thread-2.4.21-chaos.patch
+extN-misc-fixup.patch
+ext3-noread-2.4.21-chaos.patch
+extN-wantedi-2.4.21-chaos.patch
+ext3-san-2.4.20.patch
+extN-2.4.18-ino_sb_fixup.patch
+ext3-map_inode_page_2.4.18.patch
+ext3-error-export.patch
+iopen-2.4.21-chaos.patch
+tcp-zero-copy-2.4.21-chaos.patch
+jbd-dont-account-blocks-twice.patch
+jbd-commit-tricks.patch
+ext3-o_direct-2.4.21-chaos.patch
+ext3-no-write-super-chaos.patch
+add_page_private.patch
+ext3-raw-lookup.patch
+nfs_export_kernel-2.4.21-chaos.patch
+ext3-ea-in-inode-2.4.21-chaos.patch
+listman-2.4.21-chaos.patch
+gfp_memalloc-2.4.21-chaos.patch
+ext3-xattr-ptr-arith-fix.patch
+kernel_text_address-2.4.18-chaos.patch
+pagecache-lock-2.4.21-chaos.patch
+ext3-truncate-buffer-head.patch
+inode-max-readahead-2.4.24.patch
+dcache_refcount_debug.patch
+blkdev_tunables-2.4.21-chaos.patch
+ext3-nlinks-2.4.21-chaos.patch
+++ /dev/null
-configurable-x86-stack-2.4.20.patch
-uml-patch-2.4.20-6.patch
-uml-2.4.20-do_mmap_pgoff-fix.patch
-uml-2.4.20-fixes-1.patch
-uml_get_kmem_end_export.patch
-dev_read_only_2.4.20.patch
-exports_2.4.20.patch
-lustre_version.patch
-vfs_intent-2.4.20-vanilla.patch
-invalidate_show.patch
-export-truncate.patch
-iod-stock-24-exports.patch
-uml_check_get_page.patch
-uml_no_panic.patch
-ext-2.4-patch-1.patch
-ext-2.4-patch-2.patch
-ext-2.4-patch-3.patch
-ext-2.4-patch-4.patch
-linux-2.4.20-xattr-0.8.54.patch
-ext3-2.4.20-fixes.patch
-ext3-2.4-ino_t.patch
-ext3-largefile.patch
-ext3-truncate_blocks.patch
-ext3-unmount_sync.patch
-ext3-use-after-free.patch
-ext3-orphan_lock.patch
-ext3-noread-2.4.20.patch
-ext3-delete_thread-2.4.20.patch
-extN-wantedi.patch
-ext3-san-2.4.20.patch
-ext3-map_inode_page.patch
-ext3-error-export.patch
-iopen-2.4.20.patch
-tcp-zero-copy.patch
-jbd-dont-account-blocks-twice.patch
-jbd-commit-tricks.patch
-ext3-no-write-super.patch
-add_page_private.patch
-socket-exports-vanilla.patch
-removepage-2.4.20.patch
-jbd-ctx_switch.patch
-jbd-flushtime.patch
-jbd-get_write_access.patch
-nfs_export_kernel-2.4.20.patch
-ext3-raw-lookup.patch
-ext3-ea-in-inode-2.4.20.patch
-listman-2.4.20.patch
-ext3-trusted_ea-2.4.20.patch
-ext3-extents-2.4.20.patch
-ext3-extents-in-ea-2.4.20.patch
-ext3-extents-in-ea-ioctl-2.4.20.patch
-ext3-record-extents-ea.patch
-kernel_text_address-2.4.20-vanilla.patch
-ext3-xattr-ptr-arith-fix.patch
-gfp_memalloc-2.4.22.patch
-loop_device_get_info.patch
-ext3-snapfs-2.4.20.patch
+++ /dev/null
-configurable-x86-stack-2.4.19-pre1.patch
-dev_read_only_2.4.20.patch
-exports_2.4.19-pre1.patch
-lustre_version.patch
-vfs_intent-2.4.19-pre1.patch
-invalidate_show-2.4.19-pre1.patch
-export-truncate.patch
-iod-stock-24-exports.patch
-ext3-htree-2.4.19-pre1.patch
-linux-2.4.19-pre1-xattr-0.8.54.patch
-ext3-2.4.20-fixes.patch
-ext3-2.4-ino_t.patch
-ext3-largefile.patch
-ext3-truncate_blocks.patch
-ext3-unmount_sync.patch
-ext3-use-after-free-2.4.19-pre1.patch
-ext3-orphan_lock.patch
-ext3-noread-2.4.20.patch
-ext3-delete_thread-2.4.20.patch
-extN-wantedi.patch
-ext3-san-2.4.20.patch
-ext3-map_inode_page.patch
-ext3-error-export.patch
-iopen-2.4.20.patch
-tcp-zero-copy-2.4.19-pre1.patch
-jbd-dont-account-blocks-twice.patch
-jbd-commit-tricks.patch
-ext3-no-write-super.patch
-add_page_private-2.4.19-pre1.patch
-socket-exports-vanilla.patch
-removepage-2.4.20.patch
-jbd-ctx_switch.patch
-jbd-flushtime-2.4.19-suse.patch
-jbd-get_write_access.patch
-nfs_export_kernel-2.4.19-pre1.patch
-ext3-raw-lookup.patch
-ext3-ea-in-inode-2.4.20.patch
-listman-2.4.20.patch
-ext3-trusted_ea-2.4.20.patch
-kernel_text_address-2.4.19-pre1.patch
-jbd-2.4.19-pre1-jcberr.patch
-seq-private-2.4.19-pre1.patch
-kdev-2.4.19-pre1.patch
-resched-2.4.19-pre1.patch
-ext3-xattr-ptr-arith-fix.patch
-gfp_memalloc-2.4.22.patch
-vmalloc_to_page-2.4.19-pre1.patch
-ext3-truncate-buffer-head.patch
+++ /dev/null
-configurable-x86-stack-2.4.20.patch
-uml-patch-2.4.20-6.patch
-uml-2.4.20-do_mmap_pgoff-fix.patch
-uml-2.4.20-fixes-1.patch
-uml_get_kmem_end_export.patch
-dev_read_only_2.4.20.patch
-exports_2.4.20.patch
-lustre_version.patch
-vfs_intent-2.4.20-vanilla.patch
-invalidate_show.patch
-export-truncate.patch
-iod-stock-24-exports.patch
-uml_check_get_page.patch
-uml_no_panic.patch
-ext-2.4-patch-1.patch
-ext-2.4-patch-2.patch
-ext-2.4-patch-3.patch
-ext-2.4-patch-4.patch
-linux-2.4.20-xattr-0.8.54.patch
-ext3-2.4.20-fixes.patch
-ext3-2.4-ino_t.patch
-ext3-largefile.patch
-ext3-truncate_blocks.patch
-ext3-unmount_sync.patch
-ext3-use-after-free.patch
-ext3-orphan_lock.patch
-ext3-noread-2.4.20.patch
-ext3-delete_thread-2.4.20.patch
-extN-wantedi.patch
-ext3-san-2.4.20.patch
-ext3-map_inode_page.patch
-ext3-error-export.patch
-iopen-2.4.20.patch
-tcp-zero-copy.patch
-jbd-dont-account-blocks-twice.patch
-jbd-commit-tricks.patch
-ext3-no-write-super.patch
-add_page_private.patch
-socket-exports-vanilla.patch
-removepage-2.4.20.patch
-jbd-ctx_switch.patch
-jbd-flushtime.patch
-jbd-get_write_access.patch
-nfs_export_kernel-2.4.20.patch
-ext3-raw-lookup.patch
-ext3-ea-in-inode-2.4.20.patch
-listman-2.4.20.patch
-ext3-trusted_ea-2.4.20.patch
-kernel_text_address-2.4.20-vanilla.patch
-ext3-xattr-ptr-arith-fix.patch
-gfp_memalloc-2.4.22.patch
-procfs-ndynamic-2.4.patch
-linux-2.4.20-filemap.patch
-ext3-truncate-buffer-head.patch
+++ /dev/null
-configurable-x86-stack-2.4.20.patch
-dev_read_only_2.4.20-rh.patch
-exports_2.4.20-rh-hp.patch
-lustre_version.patch
-vfs_intent-2.4.20-vanilla.patch
-invalidate_show.patch
-export-truncate.patch
-iod-stock-exports-2.4.22.patch
-ext3-htree-2.4.22-rh.patch
-linux-2.4.22-xattr-0.8.54.patch
-ext3-orphan_lock-2.4.22-rh.patch
-ext3-noread-2.4.20.patch
-ext3-delete_thread-suse.patch
-extN-wantedi.patch
-ext3-san-2.4.20.patch
-ext3-map_inode_page.patch
-ext3-error-export.patch
-iopen-2.4.20.patch
-tcp-zero-copy-2.4.22-rh.patch
-jbd-dont-account-blocks-twice.patch
-jbd-commit-tricks.patch
-ext3-no-write-super-chaos.patch
-add_page_private.patch
-socket-exports-2.4.22-rh.patch
-nfs_export_kernel-2.4.22.patch
-ext3-raw-lookup.patch
-ext3-ea-in-inode-2.4.22-rh.patch
-listman-2.4.20.patch
-ext3-trusted_ea-2.4.20.patch
-kernel_text_address-2.4.22-vanilla.patch
-gfp_memalloc-2.4.22.patch
-ext3-xattr-ptr-arith-fix.patch
-3.5G-address-space-2.4.22-vanilla.patch
-procfs-ndynamic-2.4.patch
-ext3-truncate-buffer-head.patch
SERIES MNEMONIC COMMENT ARCH
-chaos-2.4.18 linux-chaos-2.4.18 LLNL 2.4.18 chaos ~65 i386
hp-pnnl-2.4.20 linux-2.4.20-hp4_pnnl1 same as vanilla but no uml ia64
-vanilla-2.4.20 linux-2.4.20 patch with uml-2.4.20-6 um
+vanilla-2.4.24 linux-2.4.24 patch with uml-2.4.24-6 um
chaos-2.4.21 linux-chaos-2.4.21 same as rh-2.4.21-15.EL i386
+rhel-2.4.21 linux-2.4.21-15.3EL same as chaos-2.4.21 i386
vanilla-2.4.24 linux-2.4.24 patch with uml-2.4.24-1 um
kgdb-2.5.73 linux-2.5.73 vanilla 2.5.73 with kgdb i386
-bproc-2.4.20-hp-pnnl linux-2.4.20-hp4_pnnl9 hp-pnnl + bproc i386
if (added)
ldlm_flock_destroy(req, mode, *flags);
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_OTHER, res);
RETURN(LDLM_ITER_CONTINUE);
}
cli->cl_dirty = 0;
cli->cl_avail_grant = 0;
+ /* FIXME: should limit this for the sum of all cl_dirty_max */
cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
+ if (cli->cl_dirty_max >> PAGE_SHIFT > num_physpages / 8)
+ cli->cl_dirty_max = num_physpages << (PAGE_SHIFT - 3);
INIT_LIST_HEAD(&cli->cl_cache_waiters);
INIT_LIST_HEAD(&cli->cl_loi_ready_list);
INIT_LIST_HEAD(&cli->cl_loi_write_list);
RETURN(rc);
}
+static struct ldlm_lock *
+find_existing_lock(struct obd_export *exp, struct lustre_handle *remote_hdl)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct list_head *iter;
+
+ l_lock(&obd->obd_namespace->ns_lock);
+ list_for_each(iter, &exp->exp_ldlm_data.led_held_locks) {
+ struct ldlm_lock *lock;
+ lock = list_entry(iter, struct ldlm_lock, l_export_chain);
+ if (lock->l_remote_handle.cookie == remote_hdl->cookie) {
+ LDLM_LOCK_GET(lock);
+ l_unlock(&obd->obd_namespace->ns_lock);
+ return lock;
+ }
+ }
+ l_unlock(&obd->obd_namespace->ns_lock);
+ return NULL;
+}
+
+
int ldlm_handle_enqueue(struct ptlrpc_request *req,
ldlm_completion_callback completion_callback,
ldlm_blocking_callback blocking_callback,
flags = dlm_req->lock_flags;
+ LASSERT(req->rq_export);
+
+ if (flags & LDLM_FL_REPLAY) {
+ lock = find_existing_lock(req->rq_export,
+ &dlm_req->lock_handle1);
+ if (lock != NULL) {
+ DEBUG_REQ(D_HA, req, "found existing lock cookie "LPX64,
+ lock->l_handle.h_cookie);
+ GOTO(existing_lock, rc = 0);
+ }
+
+ }
+
/* The lock's callback data might be set in the policy function */
lock = ldlm_lock_create(obddev->obd_namespace, &dlm_req->lock_handle2,
dlm_req->lock_desc.l_resource.lr_name,
sizeof(lock->l_remote_handle));
LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
- LASSERT(req->rq_export);
-
OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_BLOCKED, obd_timeout * 2);
l_lock(&lock->l_resource->lr_namespace->ns_lock);
if (req->rq_export->exp_failed) {
&lock->l_export->exp_ldlm_data.led_held_locks);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+existing_lock:
+
if (flags & LDLM_FL_HAS_INTENT) {
/* In this case, the reply buffer is allocated deep in
* local_lock_enqueue by the policy function. */
if (!list_empty(&ldlm_namespace_list)) {
CERROR("ldlm still has namespaces; clean these up first.\n");
- ldlm_dump_all_namespaces();
+ ldlm_dump_all_namespaces(D_DLMTRACE);
RETURN(-EBUSY);
}
struct obd_device *obd;
if (lock->l_conn_export == NULL) {
- static unsigned long next_dump = 0;
+ static unsigned long next_dump = 0, last_dump = 0;
LDLM_ERROR(lock, "lock timed out; not entering recovery in "
"server code, just going back to sleep");
if (time_after(jiffies, next_dump)) {
- ldlm_namespace_dump(lock->l_resource->lr_namespace);
- if (next_dump == 0)
- portals_debug_dumplog();
+ last_dump = next_dump;
next_dump = jiffies + 300 * HZ;
+ ldlm_namespace_dump(D_DLMTRACE,
+ lock->l_resource->lr_namespace);
+ if (last_dump == 0)
+ portals_debug_dumplog();
}
RETURN(0);
}
kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab;
-spinlock_t ldlm_namespace_lock = SPIN_LOCK_UNLOCKED;
+DECLARE_MUTEX(ldlm_namespace_lock);
struct list_head ldlm_namespace_list = LIST_HEAD_INIT(ldlm_namespace_list);
struct proc_dir_entry *ldlm_type_proc_dir = NULL;
struct proc_dir_entry *ldlm_ns_proc_dir = NULL;
static int ldlm_proc_dump_ns(struct file *file, const char *buffer,
unsigned long count, void *data)
{
- ldlm_dump_all_namespaces();
+ ldlm_dump_all_namespaces(D_DLMTRACE);
RETURN(count);
}
ns->ns_nr_unused = 0;
ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE;
- spin_lock(&ldlm_namespace_lock);
+ down(&ldlm_namespace_lock);
list_add(&ns->ns_list_chain, &ldlm_namespace_list);
- spin_unlock(&ldlm_namespace_lock);
+ up(&ldlm_namespace_lock);
#ifdef __KERNEL__
ldlm_proc_namespace(ns);
#endif
CERROR("Resource refcount nonzero (%d) after "
"lock cleanup; forcing cleanup.\n",
atomic_read(&res->lr_refcount));
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_ERROR, res);
atomic_set(&res->lr_refcount, 1);
ldlm_resource_putref(res);
}
if (!ns)
RETURN(ELDLM_OK);
- spin_lock(&ldlm_namespace_lock);
+ down(&ldlm_namespace_lock);
list_del(&ns->ns_list_chain);
-
- spin_unlock(&ldlm_namespace_lock);
+ up(&ldlm_namespace_lock);
/* At shutdown time, don't call the cancellation callback */
ldlm_namespace_cleanup(ns, 0);
struct ldlm_resource *res;
OBD_SLAB_ALLOC(res, ldlm_resource_slab, SLAB_NOFS, sizeof *res);
- if (res == NULL) {
- LBUG();
+ if (res == NULL)
return NULL;
- }
+
memset(res, 0, sizeof(*res));
INIT_LIST_HEAD(&res->lr_children);
"type: %d", type);
res = ldlm_resource_new();
- if (!res) {
- LBUG();
+ if (!res)
RETURN(NULL);
- }
spin_lock(&ns->ns_counter_lock);
ns->ns_resources++;
}
}
- if (create)
+ if (create) {
res = ldlm_resource_add(ns, parent, name, type);
- else
+ if (res == NULL)
+ GOTO(out, NULL);
+ } else {
res = NULL;
+ }
if (create && ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
int rc;
CERROR("lvbo_init failed for resource "LPU64": rc %d\n",
name.name[0], rc);
} else {
+out:
l_unlock(&ns->ns_lock);
}
}
if (!list_empty(&res->lr_granted)) {
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_ERROR, res);
LBUG();
}
if (!list_empty(&res->lr_converting)) {
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_ERROR, res);
LBUG();
}
if (!list_empty(&res->lr_waiting)) {
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_ERROR, res);
LBUG();
}
if (!list_empty(&res->lr_children)) {
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_ERROR, res);
LBUG();
}
{
l_lock(&res->lr_namespace->ns_lock);
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_OTHER, res);
CDEBUG(D_OTHER, "About to add this lock:\n");
ldlm_lock_dump(D_OTHER, lock, 0);
memcpy(&desc->lr_name, &res->lr_name, sizeof(desc->lr_name));
}
-void ldlm_dump_all_namespaces(void)
+void ldlm_dump_all_namespaces(int level)
{
struct list_head *tmp;
- spin_lock(&ldlm_namespace_lock);
+ down(&ldlm_namespace_lock);
list_for_each(tmp, &ldlm_namespace_list) {
struct ldlm_namespace *ns;
ns = list_entry(tmp, struct ldlm_namespace, ns_list_chain);
- ldlm_namespace_dump(ns);
+ ldlm_namespace_dump(level, ns);
}
- spin_unlock(&ldlm_namespace_lock);
+ up(&ldlm_namespace_lock);
}
-void ldlm_namespace_dump(struct ldlm_namespace *ns)
+void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
{
struct list_head *tmp;
- unsigned int debug_save = portal_debug;
- portal_debug |= D_OTHER;
- l_lock(&ns->ns_lock);
- CDEBUG(D_OTHER, "--- Namespace: %s (rc: %d, client: %d)\n", ns->ns_name,
+ CDEBUG(level, "--- Namespace: %s (rc: %d, client: %d)\n", ns->ns_name,
ns->ns_refcount, ns->ns_client);
- list_for_each(tmp, &ns->ns_root_list) {
- struct ldlm_resource *res;
- res = list_entry(tmp, struct ldlm_resource, lr_childof);
+ l_lock(&ns->ns_lock);
+ if (time_after(jiffies, ns->ns_next_dump)) {
+ list_for_each(tmp, &ns->ns_root_list) {
+ struct ldlm_resource *res;
+ res = list_entry(tmp, struct ldlm_resource, lr_childof);
- /* Once we have resources with children, this should really dump
- * them recursively. */
- ldlm_resource_dump(res);
+ /* Once we have resources with children, this should
+ * really dump them recursively. */
+ ldlm_resource_dump(level, res);
+ }
+ ns->ns_next_dump = jiffies + 10 * HZ;
}
l_unlock(&ns->ns_lock);
- portal_debug = debug_save;
}
-void ldlm_resource_dump(struct ldlm_resource *res)
+void ldlm_resource_dump(int level, struct ldlm_resource *res)
{
struct list_head *tmp;
int pos;
if (RES_NAME_SIZE != 4)
LBUG();
- CDEBUG(D_OTHER, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64
+ CDEBUG(level, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64
") (rc: %d)\n", res, res->lr_name.name[0], res->lr_name.name[1],
res->lr_name.name[2], res->lr_name.name[3],
atomic_read(&res->lr_refcount));
if (!list_empty(&res->lr_granted)) {
pos = 0;
- CDEBUG(D_OTHER, "Granted locks:\n");
+ CDEBUG(level, "Granted locks:\n");
list_for_each(tmp, &res->lr_granted) {
struct ldlm_lock *lock;
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(D_OTHER, lock, ++pos);
+ ldlm_lock_dump(level, lock, ++pos);
}
}
if (!list_empty(&res->lr_converting)) {
pos = 0;
- CDEBUG(D_OTHER, "Converting locks:\n");
+ CDEBUG(level, "Converting locks:\n");
list_for_each(tmp, &res->lr_converting) {
struct ldlm_lock *lock;
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(D_OTHER, lock, ++pos);
+ ldlm_lock_dump(level, lock, ++pos);
}
}
if (!list_empty(&res->lr_waiting)) {
pos = 0;
- CDEBUG(D_OTHER, "Waiting locks:\n");
+ CDEBUG(level, "Waiting locks:\n");
list_for_each(tmp, &res->lr_waiting) {
struct ldlm_lock *lock;
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(D_OTHER, lock, ++pos);
+ ldlm_lock_dump(level, lock, ++pos);
}
}
}
LBUG();
flags = 0;
- lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,
- 0);
+ lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,0);
if (lock1 == NULL)
LBUG();
err = ldlm_lock_enqueue(ns, lock1, &ext1, sizeof(ext1), &flags, NULL,
LBUG();
flags = 0;
- lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR,
- NULL, 0);
+ lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,0);
err = ldlm_lock_enqueue(ns, lock2, &ext2, sizeof(ext2), &flags, NULL,
NULL);
if (err != ELDLM_OK)
struct dentry **de = icbd->icbd_childp;
struct inode *parent = icbd->icbd_parent;
struct ll_sb_info *sbi = ll_i2sbi(parent);
- struct dentry *dentry = *de;
struct inode *inode = NULL;
int rc;
ENTRY;
rc = ll_prep_inode(sbi->ll_osc_exp, &inode, request, offset,
- dentry->d_sb);
+ (*de)->d_sb);
if (rc)
RETURN(rc);
}
}
- dentry = *de = ll_find_alias(inode, dentry);
+ *de = ll_find_alias(inode, *de);
} else {
ENTRY;
spin_lock(&dcache_lock);
- ll_d_add(dentry, inode);
+ ll_d_add(*de, inode);
spin_unlock(&dcache_lock);
}
- ll_set_dd(dentry);
- dentry->d_op = &ll_d_ops;
+ ll_set_dd(*de);
+ (*de)->d_op = &ll_d_ops;
RETURN(0);
}
if (dentry->d_name.len > EXT3_NAME_LEN)
RETURN(ERR_PTR(-ENAMETOOLONG));
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
- dentry->d_name.name, parent->i_ino, parent->i_generation,
- parent, LL_IT2STR(it));
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%*s,dir=%lu/%u(%p),intent=%s\n",
+ dentry->d_name.len, dentry->d_name.name, parent->i_ino,
+ parent->i_generation, parent, LL_IT2STR(it));
if (d_mountpoint(dentry))
CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
int rc = 0;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
- dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
- LL_IT2STR(it));
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%*s,dir=%lu/%u(%p),intent=%s\n",
+ dentry->d_name.len, dentry->d_name.name, dir->i_ino,
+ dir->i_generation, dir, LL_IT2STR(it));
rc = it_open_error(DISP_OPEN_CREATE, it);
if (rc)
{
struct ptlrpc_request *request = NULL;
struct inode *dir = nd->dentry->d_inode;
- const char *name = nd->last.name;
- int len = nd->last.len;
struct ll_sb_info *sbi = ll_i2sbi(dir);
struct mdc_op_data op_data;
int err = -EMLINK;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
- name, dir->i_ino, dir->i_generation, dir);
+ nd->last.name, dir->i_ino, dir->i_generation, dir);
if (dir->i_nlink >= EXT3_LINK_MAX)
RETURN(err);
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
+ ll_prepare_mdc_op_data(&op_data, dir, NULL, nd->last.name,
+ nd->last.len, 0);
err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode,
current->fsuid, current->fsgid,
rdev, &request);
RETURN(err);
}
-static int ll_mknod(struct inode *dir, struct dentry *child, int mode,
+static int ll_mknod(struct inode *dir, struct dentry *dchild, int mode,
ll_dev_t rdev)
{
struct ptlrpc_request *request = NULL;
struct inode *inode = NULL;
- const char *name = child->d_name.name;
- int len = child->d_name.len;
struct ll_sb_info *sbi = ll_i2sbi(dir);
struct mdc_op_data op_data;
int err = -EMLINK;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
- name, dir->i_ino, dir->i_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%*s,dir=%lu/%u(%p)\n",
+ dchild->d_name.len, dchild->d_name.name,
+ dir->i_ino, dir->i_generation, dir);
if (dir->i_nlink >= EXT3_LINK_MAX)
RETURN(err);
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
+ ll_prepare_mdc_op_data(&op_data, dir, NULL, dchild->d_name.name,
+ dchild->d_name.len, 0);
err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode,
current->fsuid, current->fsgid,
rdev, &request);
ll_update_times(request, 0, dir);
err = ll_prep_inode(sbi->ll_osc_exp, &inode, request, 0,
- child->d_sb);
+ dchild->d_sb);
if (err)
GOTO(out_err, err);
break;
RETURN(-EINVAL);
}
- d_instantiate(child, inode);
+ d_instantiate(dchild, inode);
out_err:
ptlrpc_req_finished(request);
RETURN(err);
static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
{
struct inode *dir = nd->dentry->d_inode;
- const char *name = nd->last.name;
- int len = nd->last.len;
struct ptlrpc_request *request = NULL;
struct ll_sb_info *sbi = ll_i2sbi(dir);
struct mdc_op_data op_data;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),target=%s\n",
- name, dir->i_ino, dir->i_generation, dir, tgt);
+ nd->last.name, dir->i_ino, dir->i_generation, dir, tgt);
if (dir->i_nlink >= EXT3_LINK_MAX)
RETURN(err);
- ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
+ ll_prepare_mdc_op_data(&op_data, dir, NULL, nd->last.name,
+ nd->last.len, 0);
err = mdc_create(sbi->ll_mdc_exp, &op_data,
tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
current->fsuid, current->fsgid, 0, &request);
{
struct inode *src = srcnd->dentry->d_inode;
struct inode *dir = tgtnd->dentry->d_inode;
- const char *name = tgtnd->last.name;
- int len = tgtnd->last.len;
struct ptlrpc_request *request = NULL;
struct mdc_op_data op_data;
int err;
struct ll_sb_info *sbi = ll_i2sbi(dir);
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),dir=%lu/%u(%p),target=%s\n",
- src->i_ino, src->i_generation, src,
- dir->i_ino, dir->i_generation, dir, name);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%s inode=%lu/%u(%p), dir=%lu/%u(%p), "
+ "target=%s\n", srcnd->last.name, src->i_ino, src->i_generation,
+ src, dir->i_ino, dir->i_generation, dir, tgtnd->last.name);
- ll_prepare_mdc_op_data(&op_data, src, dir, name, len, 0);
+ ll_prepare_mdc_op_data(&op_data, src, dir, tgtnd->last.name,
+ tgtnd->last.len, 0);
err = mdc_link(sbi->ll_mdc_exp, &op_data, &request);
if (err == 0)
ll_update_times(request, 0, dir);
static int ll_mkdir_raw(struct nameidata *nd, int mode)
{
struct inode *dir = nd->dentry->d_inode;
- const char *name = nd->last.name;
- int len = nd->last.len;
struct ptlrpc_request *request = NULL;
struct ll_sb_info *sbi = ll_i2sbi(dir);
struct mdc_op_data op_data;
int err = -EMLINK;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
- name, dir->i_ino, dir->i_generation, dir);
+ nd->last.name, dir->i_ino, dir->i_generation, dir);
if (dir->i_nlink >= EXT3_LINK_MAX)
RETURN(err);
mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
- ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
+ ll_prepare_mdc_op_data(&op_data, dir, NULL, nd->last.name,
+ nd->last.len, 0);
err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode,
current->fsuid, current->fsgid, 0, &request);
if (err == 0)
static int ll_rmdir_raw(struct nameidata *nd)
{
struct inode *dir = nd->dentry->d_inode;
- const char *name = nd->last.name;
- int len = nd->last.len;
struct ptlrpc_request *request = NULL;
struct mdc_op_data op_data;
int rc;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
- name, dir->i_ino, dir->i_generation, dir);
+ nd->last.name, dir->i_ino, dir->i_generation, dir);
- ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR);
+ ll_prepare_mdc_op_data(&op_data, dir, NULL, nd->last.name,
+ nd->last.len, S_IFDIR);
rc = mdc_unlink(ll_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
if (rc == 0)
ll_update_times(request, 0, dir);
static int ll_unlink_raw(struct nameidata *nd)
{
struct inode *dir = nd->dentry->d_inode;
- const char *name = nd->last.name;
- int len = nd->last.len;
struct ptlrpc_request *request = NULL;
struct mdc_op_data op_data;
int rc;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
- name, dir->i_ino, dir->i_generation, dir);
+ nd->last.name, dir->i_ino, dir->i_generation, dir);
- ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
+ ll_prepare_mdc_op_data(&op_data, dir, NULL, nd->last.name,
+ nd->last.len, 0);
rc = mdc_unlink(ll_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
if (rc)
GOTO(out, rc);
RETURN(rc);
}
-static int ll_rename_raw(struct nameidata *oldnd, struct nameidata *newnd)
+static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
{
- struct inode *src = oldnd->dentry->d_inode;
- struct inode *tgt = newnd->dentry->d_inode;
- const char *oldname = oldnd->last.name;
- int oldlen = oldnd->last.len;
- const char *newname = newnd->last.name;
- int newlen = newnd->last.len;
+ struct inode *src = srcnd->dentry->d_inode;
+ struct inode *tgt = tgtnd->dentry->d_inode;
struct ptlrpc_request *request = NULL;
struct ll_sb_info *sbi = ll_i2sbi(src);
struct mdc_op_data op_data;
int err;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:oldname=%s,src_dir=%lu/%u(%p),newname=%s,"
- "tgt_dir=%lu/%u(%p)\n", oldname, src->i_ino, src->i_generation,
- src, newname, tgt->i_ino, tgt->i_generation, tgt);
+ "tgt_dir=%lu/%u(%p)\n",
+ srcnd->last.name, src->i_ino, src->i_generation, src,
+ tgtnd->last.name, tgt->i_ino, tgt->i_generation, tgt);
ll_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0);
err = mdc_rename(sbi->ll_mdc_exp, &op_data,
- oldname, oldlen, newname, newlen, &request);
+ srcnd->last.name, srcnd->last.len,
+ tgtnd->last.name, tgtnd->last.len, &request);
if (!err) {
ll_update_times(request, 0, src);
ll_update_times(request, 0, tgt);
if (rc) {
if (rc != -ENOENT)
CERROR("inode %lu: rc = %d\n", inode->i_ino, rc);
- RETURN(rc);
+ GOTO (failed, rc);
}
body = lustre_msg_buf ((*request)->rq_repmsg, 0, sizeof (*body));
failed:
ptlrpc_req_finished (*request);
- RETURN (-EPROTO);
+ RETURN (rc);
}
static int ll_readlink(struct dentry *dentry, char *buffer, int buflen)
down(&lli->lli_open_sem);
rc = ll_readlink_internal(inode, &request, &symname);
up(&lli->lli_open_sem);
- if (rc)
+ if (rc) {
+ path_release(nd); /* Kernel assumes that ->follow_link()
+ releases nameidata on error */
GOTO(out, rc);
+ }
rc = vfs_follow_link(nd, symname);
ptlrpc_req_finished(request);
{
struct obd_device *obd = class_exp2obd(exp);
struct lov_obd *lov = &obd->u.lov;
+ struct obd_export *osc_exp;
int rc, i;
ENTRY;
if (lov->refcount != 0)
goto out_local;
+ spin_lock(&lov->lov_lock);
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
if (lov->tgts[i].ltd_exp == NULL)
continue;
+ osc_exp = lov->tgts[i].ltd_exp;
+ lov->tgts[i].ltd_exp = NULL;
+
if (obd->obd_no_recov) {
/* Pass it on to our clients.
* XXX This should be an argument to disconnect,
* XXX not a back-door flag on the OBD. Ah well.
*/
struct obd_device *osc_obd;
- osc_obd = class_exp2obd(lov->tgts[i].ltd_exp);
+ osc_obd = class_exp2obd(osc_exp);
if (osc_obd)
osc_obd->obd_no_recov = 1;
}
- obd_register_observer(lov->tgts[i].ltd_exp->exp_obd, NULL);
+ obd_register_observer(osc_exp->exp_obd, NULL);
- rc = obd_disconnect(lov->tgts[i].ltd_exp, flags);
+ spin_unlock(&lov->lov_lock);
+ rc = obd_disconnect(osc_exp, flags);
+ spin_lock(&lov->lov_lock);
if (rc) {
if (lov->tgts[i].active) {
CERROR("Target %s disconnect error %d\n",
lov->desc.ld_active_tgt_count--;
lov->tgts[i].active = 0;
}
- lov->tgts[i].ltd_exp = NULL;
}
+ spin_unlock(&lov->lov_lock);
out_local:
rc = class_disconnect(exp, 0);
spin_lock(&lov->lov_lock);
for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
+ if (tgt->ltd_exp == NULL)
+ continue;
+
CDEBUG(D_INFO, "lov idx %d is %s conn "LPX64"\n",
i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0)
char submd_buf[sizeof(struct lov_stripe_md) + sizeof(struct lov_oinfo)];
struct lov_stripe_md *submd = (void *)submd_buf;
ldlm_error_t rc;
- int i, save_flags = *flags;
+ int i, save_flags = *flags, all_skipped = 1;
ENTRY;
if (lsm_bad_magic(lsm))
continue;
}
+ all_skipped = 0;
+
/* XXX LOV STACKING: submd should be from the subobj */
submd->lsm_object_id = loi->loi_id;
submd->lsm_stripe_count = 0;
}
}
}
+ if (all_skipped)
+ GOTO(out_lockh, rc = -EIO);
+
if (lsm->lsm_stripe_count > 1)
lov_llh_put(lov_lockh);
RETURN(ELDLM_OK);
}
}
+out_lockh:
if (lsm->lsm_stripe_count > 1) {
lov_llh_destroy(lov_lockh);
lov_llh_put(lov_lockh);
len, karg, uarg);
if (err) {
if (lov->tgts[i].active) {
- CERROR("error: iocontrol OSC %s on OST"
- "idx %d: err = %d\n",
- lov->tgts[i].uuid.uuid, i, err);
+ CERROR("error: iocontrol OSC %s on OST "
+ "idx %d cmd %x: err = %d\n",
+ lov->tgts[i].uuid.uuid, i,
+ cmd, err);
if (!rc)
rc = err;
}
/* We could just return 1 immediately, but since we should only
* be called in revalidate_it if we already have a lock, let's
* verify that. */
- struct ldlm_res_id res_id ={.name = {cfid->id,
- cfid->generation}};
+ struct ldlm_res_id res_id = { .name = { cfid->id,
+ cfid->generation}};
struct lustre_handle lockh;
int mode = LCK_PR;
if (!inode)
RETURN(ERR_PTR(-ENOENT));
- if (is_bad_inode(inode)) {
- CERROR("bad inode returned %lu/%u\n",
- inode->i_ino, inode->i_generation);
- dput(result);
- RETURN(ERR_PTR(-ENOENT));
- }
-
if (generation && inode->i_generation != generation) {
/* we didn't find the right inode.. */
CERROR("bad inode %lu, link: %lu ct: %d or generation %u/%u\n",
/* If you change this message, be sure to update
* replay_single:test_46 */
- CERROR("force closing client file handle for %*s (%s:%lu)\n",
+ CDEBUG(D_INODE, "force closing file handle for %*s (%s:%lu)\n",
dentry->d_name.len, dentry->d_name.name,
ll_bdevname(dentry->d_inode->i_sb, btmp),
dentry->d_inode->i_ino);
if (lock)
down(&inode->i_sem);
rc = fsfilt_get_md(obd, inode, md, *size);
- if (lock)
- up(&inode->i_sem);
if (rc < 0) {
CERROR("Error %d reading eadata for ino %lu\n",
*size = rc;
}
}
+ if (lock)
+ up(&inode->i_sem);
RETURN (rc);
}
struct ldlm_resource *res;
DEBUG_REQ(D_DLMTRACE, req, "resent, not enqueuing new locks");
granted_lock = ldlm_handle2lock(child_lockh);
- LASSERT(granted_lock);
+ LASSERTF(granted_lock != NULL, LPU64"/%u lockh "LPX64"\n",
+ body->fid1.id, body->fid1.generation,
+ child_lockh->cookie);
+
res = granted_lock->l_resource;
child_fid.id = res->lr_name.name[0];
static void fixup_handle_for_resent_req(struct ptlrpc_request *req,
struct ldlm_lock *new_lock,
+ struct ldlm_lock **old_lock,
struct lustre_handle *lockh)
{
struct obd_export *exp = req->rq_export;
continue;
if (lock->l_remote_handle.cookie == remote_hdl.cookie) {
lockh->cookie = lock->l_handle.h_cookie;
+ LDLM_DEBUG(lock, "restoring lock cookie");
DEBUG_REQ(D_HA, req, "restoring lock cookie "LPX64,
lockh->cookie);
+ if (old_lock)
+ *old_lock = LDLM_LOCK_GET(lock);
l_unlock(&obd->obd_namespace->ns_lock);
return;
}
struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
struct ldlm_reply *rep;
struct lustre_handle lockh = { 0 };
- struct ldlm_lock *new_lock;
+ struct ldlm_lock *new_lock = NULL;
int rc, offset = 2, repsize[4] = {sizeof(struct ldlm_reply),
sizeof(struct mds_body),
mds->mds_max_mdsize,
rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
intent_set_disposition(rep, DISP_IT_EXECD);
- fixup_handle_for_resent_req(req, lock, &lockh);
/* execute policy */
switch ((long)it->opc) {
case IT_OPEN:
case IT_CREAT|IT_OPEN:
+ fixup_handle_for_resent_req(req, lock, NULL, &lockh);
/* XXX swab here to assert that an mds_open reint
* packet is following */
rep->lock_policy_res2 = mds_reint(req, offset, &lockh);
case IT_GETATTR:
case IT_LOOKUP:
case IT_READDIR:
+ fixup_handle_for_resent_req(req, lock, &new_lock, &lockh);
rep->lock_policy_res2 = mds_getattr_name(offset, req, &lockh);
/* FIXME: LDLM can set req->rq_status. MDS sets
policy_res{1,2} with disposition and status.
* drop it below anyways because lock replay is done separately by the
* client afterwards. For regular RPCs we want to give the new lock to
* the client instead of whatever lock it was about to get. */
- new_lock = ldlm_handle2lock(&lockh);
+ if (new_lock == NULL)
+ new_lock = ldlm_handle2lock(&lockh);
if (new_lock == NULL && (flags & LDLM_FL_INTENT_ONLY))
RETURN(0);
- LASSERT(new_lock != NULL);
+ LASSERTF(new_lock != NULL, "op "LPX64" lockh "LPX64"\n",
+ it->opc, lockh.cookie);
/* If we've already given this lock to a client once, then we should
* have no readers or writers. Otherwise, we should have one reader
CERROR("cannot lookup __iopen__ directory: rc = %d\n", rc);
GOTO(err_pop, rc);
}
- if (!dentry->d_inode) {
+
+ mds->mds_fid_de = dentry;
+ if (!dentry->d_inode || is_bad_inode(dentry->d_inode)) {
rc = -ENOENT;
CERROR("__iopen__ directory has no inode? rc = %d\n", rc);
GOTO(err_fid, rc);
}
- mds->mds_fid_de = dentry;
dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777, 1);
if (IS_ERR(dentry)) {
ENTRY;
rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
- lustre_swab_mds_rec_unlink);
+ lustre_swab_mds_rec_rename);
if (rec == NULL)
RETURN(-EFAULT);
struct mds_update_record *rec,struct ldlm_reply *rep)
{
struct mds_obd *mds = mds_req2mds(req);
- struct inode *pending_dir = mds->mds_pending_dir->d_inode;
struct dentry *dchild;
char fidname[LL_FID_NAMELEN];
int fidlen = 0, rc;
void *handle = NULL;
ENTRY;
- down(&pending_dir->i_sem);
fidlen = ll_fid2str(fidname, fid->id, fid->generation);
- dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+ dchild = ll_lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
if (IS_ERR(dchild)) {
- up(&pending_dir->i_sem);
rc = PTR_ERR(dchild);
CERROR("error looking up %s in PENDING: rc = %d\n",fidname, rc);
RETURN(rc);
}
if (dchild->d_inode != NULL) {
- up(&pending_dir->i_sem);
mds_inode_set_orphan(dchild->d_inode);
mds_pack_inode2fid(&body->fid1, dchild->d_inode);
mds_pack_inode2body(body, dchild->d_inode);
fidname);
goto open;
}
- dput(dchild);
- up(&pending_dir->i_sem);
+ l_dput(dchild);
/* We didn't find it in PENDING so it isn't an orphan. See
* if it was a regular inode that was previously created. */
GOTO(cleanup, rc);
}
+ cleanup_phase = 2; /* child dentry */
inode = (*dchildp)->d_inode;
- if (inode != NULL)
+ if (inode != NULL) {
+ if (is_bad_inode(inode)) {
+ CERROR("bad inode returned %lu/%u\n",
+ inode->i_ino, inode->i_generation);
+ GOTO(cleanup, rc = -ENOENT);
+ }
inode = igrab(inode);
+ }
if (inode == NULL)
goto retry_locks;
} else if (S_ISREG(new_inode->i_mode)) {
mds_pack_inode2fid(&body->fid1, new_inode);
mds_pack_inode2body(body, new_inode);
- mds_pack_md(obd, req->rq_repmsg, 1, body, new_inode, MDS_PACK_MD_LOCK);
+ mds_pack_md(obd, req->rq_repmsg, 1, body, new_inode,
+ MDS_PACK_MD_LOCK);
}
}
struct l_linux_dirent *dirent, *n;
struct list_head dentry_list;
char d_name[LL_FID_NAMELEN];
- __u64 i = 0;
- int rc = 0, item = 0, namlen;
+ unsigned long inum;
+ int i = 0, rc = 0, item = 0, namlen;
ENTRY;
push_ctxt(&saved, &obd->obd_ctxt, NULL);
GOTO(err_out, rc);
list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
- i ++;
+ i++;
list_del(&dirent->lld_list);
namlen = strlen(dirent->lld_name);
LASSERT(sizeof(d_name) >= namlen + 1);
strcpy(d_name, dirent->lld_name);
+ inum = dirent->lld_ino;
OBD_FREE(dirent, sizeof(*dirent));
- CDEBUG(D_INODE, "entry "LPU64" of PENDING DIR: %s\n",
- i, d_name);
+ CDEBUG(D_INODE, "entry %d of PENDING DIR: %s\n", i, d_name);
if (((namlen == 1) && !strcmp(d_name, ".")) ||
- ((namlen == 2) && !strcmp(d_name, ".."))) {
+ ((namlen == 2) && !strcmp(d_name, "..")) || inum == 0)
continue;
- }
down(&pending_dir->i_sem);
dchild = lookup_one_len(d_name, mds->mds_pending_dir, namlen);
int minor = obd->obd_minor;
spin_lock(&obd_dev_lock);
- memset(obd, 0, sizeof(*obd));
+ obd->obd_type = NULL;
+ //memset(obd, 0, sizeof(*obd));
obd->obd_minor = minor;
spin_unlock(&obd_dev_lock);
}
static void interrupted_oig(void *data)
{
struct obd_io_group *oig = data;
- struct list_head *pos;
struct oig_callback_context *occ;
unsigned long flags;
spin_lock_irqsave(&oig->oig_lock, flags);
- list_for_each(pos, &oig->oig_occ_list) {
- occ = list_entry(pos, struct oig_callback_context,
- occ_oig_item);
+ /* We need to restart the processing each time we drop the lock, as
+ * it is possible other threads called oig_complete_one() to remove
+ * an entry elsewhere in the list while we dropped lock. We need to
+ * drop the lock because osc_ap_completion() calls oig_complete_one()
+ * which re-gets this lock ;-) as well as a lock ordering issue. */
+restart:
+ list_for_each_entry(occ, &oig->oig_occ_list, occ_oig_item) {
+ if (occ->interrupted)
+ continue;
+ occ->interrupted = 1;
+ spin_unlock_irqrestore(&oig->oig_lock, flags);
occ->occ_interrupted(occ);
+ spin_lock_irqsave(&oig->oig_lock, flags);
+ goto restart;
}
spin_unlock_irqrestore(&oig->oig_lock, flags);
}
return 0;
}
-int lprocfs_write_u64_helper(const char *buffer, unsigned long count,
- __u64 *val)
+int lprocfs_write_u64_helper(const char *buffer, unsigned long count,__u64 *val)
{
char kernbuf[22], *end;
kernbuf[count] = '\0';
- *val = simple_strtoull(kernbuf, &end, 0);
+ if (kernbuf[0] == '-')
+ *val = -simple_strtoull(kernbuf + 1, &end, 0);
+ else
+ *val = simple_strtoull(kernbuf, &end, 0);
if (kernbuf == end)
return -EINVAL;
if (filp->f_dentry->d_inode->i_size == 0) {
if (i == 0 && filter->fo_fsd->fsd_unused != 0) {
/* OST conversion, remove sometime post 1.0 */
- filter->fo_last_objids[i] =
+ filter->fo_last_objids[0] =
le64_to_cpu(filter->fo_fsd->fsd_unused);
CWARN("saving old objid "LPU64" to LAST_ID\n",
- filter->fo_last_objids[i]);
- rc = filter_update_last_objid(obd, 0, 1);
- if (rc)
- GOTO(cleanup, rc);
+ filter->fo_last_objids[0]);
} else {
filter->fo_last_objids[i] = FILTER_INIT_OBJID;
}
+ rc = filter_update_last_objid(obd, i, 1);
+ if (rc)
+ GOTO(cleanup, rc);
continue;
}
return dparent;
rc = filter_lock_dentry(obd, dparent);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow parent lock %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "parent lock");
return rc ? ERR_PTR(rc) : dparent;
}
obd_size maxsize = obd->obd_osfs.os_blocks * obd->obd_osfs.os_bsize;
obd_size tot_dirty = 0, tot_pending = 0, tot_granted = 0;
obd_size fo_tot_dirty, fo_tot_pending, fo_tot_granted;
+ int level = D_CACHE;
if (list_empty(&obd->obd_exports))
return;
spin_lock(&obd->obd_dev_lock);
list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
fed = &exp->exp_filter_data;
- LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize,
- "cli %s/%p %lu+%lu > "LPU64"\n",
- exp->exp_client_uuid.uuid, exp,
- fed->fed_grant, fed->fed_pending, maxsize);
- LASSERTF(fed->fed_dirty <= maxsize, "cli %s/%p %lu > "LPU64"\n",
- exp->exp_client_uuid.uuid, exp,fed->fed_dirty,maxsize);
- CDEBUG(D_CACHE,"%s: cli %s/%p dirty %lu pend %lu grant %lu\n",
+ if (fed->fed_grant < 0 || fed->fed_pending < 0 ||
+ fed->fed_dirty < 0)
+ level = D_ERROR;
+ if (maxsize > 0) { /* we may not have done a statfs yet */
+ LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize,
+ "cli %s/%p %ld+%ld > "LPU64"\n",
+ exp->exp_client_uuid.uuid, exp,
+ fed->fed_grant, fed->fed_pending, maxsize);
+ LASSERTF(fed->fed_dirty <= maxsize,
+ "cli %s/%p %ld > "LPU64"\n",
+ exp->exp_client_uuid.uuid, exp,
+ fed->fed_dirty, maxsize);
+ }
+ CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
fed->fed_dirty, fed->fed_pending, fed->fed_grant);
tot_granted += fed->fed_grant + fed->fed_pending;
struct obd_device *obd = exp->exp_obd;
struct filter_obd *filter = &obd->u.filter;
struct filter_export_data *fed = &exp->exp_filter_data;
+ int level = D_CACHE;
spin_lock(&obd->obd_osfs_lock);
spin_lock(&exp->exp_obd->obd_dev_lock);
list_del_init(&exp->exp_obd_chain);
spin_unlock(&exp->exp_obd->obd_dev_lock);
- CDEBUG(D_CACHE, "%s: cli %s/%p dirty %lu pend %lu grant %lu\n",
+ if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0)
+ level = D_ERROR;
+ CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
fed->fed_dirty, fed->fed_pending, fed->fed_grant);
LASSERTF(filter->fo_tot_granted >= fed->fed_grant,
- "%s: tot_granted "LPU64" cli %s/%p fed_grant %lu\n",
+ "%s: tot_granted "LPU64" cli %s/%p fed_grant %ld\n",
obd->obd_name, filter->fo_tot_granted,
exp->exp_client_uuid.uuid, exp, fed->fed_grant);
filter->fo_tot_granted -= fed->fed_grant;
LASSERTF(exp->exp_obd->u.filter.fo_tot_pending >= fed->fed_pending,
- "%s: tot_pending "LPU64" cli %s/%p fed_pending %lu\n",
+ "%s: tot_pending "LPU64" cli %s/%p fed_pending %ld\n",
obd->obd_name, filter->fo_tot_pending,
exp->exp_client_uuid.uuid, exp, fed->fed_pending);
LASSERTF(filter->fo_tot_dirty >= fed->fed_dirty,
- "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %lu\n",
+ "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %ld\n",
obd->obd_name, filter->fo_tot_dirty,
exp->exp_client_uuid.uuid, exp, fed->fed_dirty);
filter->fo_tot_dirty -= fed->fed_dirty;
struct obd_device *obd = exp->exp_obd;
unsigned long irqflags;
struct llog_ctxt *ctxt;
- int rc;
+ int rc, err;
ENTRY;
LASSERT(exp);
/* flush any remaining cancel messages out to the target */
ctxt = llog_get_context(obd, LLOG_UNLINK_REPL_CTXT);
- llog_sync(ctxt, exp);
+ err = llog_sync(ctxt, exp);
+ if (err)
+ CERROR("error flushing logs to MDS: rc %d\n", err);
class_export_put(exp);
RETURN(rc);
(oa->o_flags & OBD_FL_DELORPHAN)) {
if (diff >= 0)
RETURN(diff);
- if (-diff > 10000) { /* XXX make this smarter */
+ if (-diff > OST_MAX_PRECREATE) {
CERROR("ignoring bogus orphan destroy request: obdid "
LPU64" last_id "LPU64"\n",
oa->o_id, filter_last_id(filter, oa));
(group != 0 || oa->o_id == 0))
RETURN(1);
- LASSERT(diff >= 0);
+ LASSERTF(diff >= 0, LPU64" - "LPU64" = %d\n", oa->o_id,
+ filter_last_id(filter, oa), diff);
RETURN(diff);
}
}
* already exists
*/
if (recreate_obj) {
- CERROR("%s: Serious error: recreating obj %*s "
- "but obj already exists \n",
+ CERROR("%s: recreating existing object %*s?\n",
obd->obd_name, dchild->d_name.len,
dchild->d_name.name);
- LBUG();
} else {
CERROR("%s: Serious error: objid %*s already "
"exists; is this filesystem corrupt?\n",
dchild = filter_fid2dentry(obd, dparent, group, oa->o_id);
if (IS_ERR(dchild))
- GOTO(cleanup, rc = -ENOENT);
+ GOTO(cleanup, rc = PTR_ERR(dchild));
cleanup_phase = 2;
if (dchild->d_inode == NULL) {
LPROC_FILTER_LAST,
};
-#define FILTER_MAX_CACHE_SIZE (32 * 1024 * 1024) /* was OBD_OBJECT_EOF */
+//#define FILTER_MAX_CACHE_SIZE (32 * 1024 * 1024) /* was OBD_OBJECT_EOF */
+#define FILTER_MAX_CACHE_SIZE OBD_OBJECT_EOF
/* filter.c */
void f_dput(struct dentry *);
fso[i].fso_bufcnt = o->ioo_bufcnt;
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow preprw_read setup %lus\n", (jiffies - now) / HZ);
- else
- CDEBUG(D_INFO, "preprw_read setup: %lu jiffies\n",
- (jiffies - now));
+ fsfilt_check_slow(now, obd_timeout, "preprw_read setup");
for (i = 0, o = obj, rnb = nb, lnb = res; i < objcount; i++, o++) {
dentry = fso[i].fso_dentry;
}
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow start_page_read %lus\n", (jiffies - now) / HZ);
- else
- CDEBUG(D_INFO, "start_page_read: %lu jiffies\n",
- (jiffies - now));
+ fsfilt_check_slow(now, obd_timeout, "start_page_read");
lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, tot_bytes);
while (lnb-- > res) {
}
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow finish_page_read %lus\n", (jiffies - now) / HZ);
- else
- CDEBUG(D_INFO, "finish_page_read: %lu jiffies\n",
- (jiffies - now));
+ fsfilt_check_slow(now, obd_timeout, "finish_page_read");
filter_tally_read(&exp->exp_obd->u.filter, res, niocount);
fso.fso_dentry = dentry;
fso.fso_bufcnt = obj->ioo_bufcnt;
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow preprw_write setup %lus\n", (jiffies - now) / HZ);
- else
- CDEBUG(D_INFO, "preprw_write setup: %lu jiffies\n",
- (jiffies - now));
+ fsfilt_check_slow(now, obd_timeout, "preprw_write setup");
spin_lock(&exp->exp_obd->obd_osfs_lock);
if (oa)
tot_bytes += lnb->len;
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow start_page_write %lus\n", (jiffies - now) / HZ);
- else
- CDEBUG(D_INFO, "start_page_write: %lu jiffies\n",
- (jiffies - now));
+ fsfilt_check_slow(now, obd_timeout, "start_page_write");
lprocfs_counter_add(exp->exp_obd->obd_stats, LPROC_FILTER_WRITE_BYTES,
tot_bytes);
rc = fsync_inode_data_buffers(inode);
if (rc == 0)
rc = filemap_fdatawait(inode->i_mapping);
- if (rc < 0)
- GOTO(cleanup, rc);
+ if (rc < 0) {
+ /* We can race with truncate_complete_page() in the call to
+ * filter_clear_page_cache(). This is OK, because it also
+ * waits on IO completion already, but the truncate confuses
+ * the buffer_uptodate() in fsync_inode_data_buffers().
+ * The only dirty pages in the page cache on an inode should
+ * be from partial page truncates.
+ * If there is a real IO error here we'll hit it below. */
+ CDEBUG(D_WARNING, "error flushing page cache: rc %d\n", rc);
+ //GOTO(cleanup, rc);
+ }
rc = brw_kiovec(WRITE, 1, &iobuf, inode->i_dev, iobuf->blocks,
1 << inode->i_blkbits);
GOTO(cleanup, rc);
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "brw_start");
iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME);
/* filter_direct_io drops i_sem */
if (rc == 0)
obdo_from_inode(oa, inode, FILTER_VALID_FLAGS);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "direct_io");
err = fsfilt_commit_wait(obd, inode, wait_handle);
if (err)
rc = err;
if (obd_sync_filter)
LASSERT(oti->oti_transno <= obd->obd_last_committed);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "commitrw commit");
cleanup:
filter_grant_commit(exp, niocount, res);
GOTO(cleanup, rc);
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "brw_start");
iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME);
for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
}
up(&inode->i_sem);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "direct_io");
rc = filter_finish_transno(exp, oti, rc);
if (obd_sync_filter)
LASSERT(oti->oti_transno <= obd->obd_last_committed);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "commitrw commit");
cleanup:
filter_grant_commit(exp, niocount, res);
if (rc)
return rc;
- if (val < 0 || val > OSC_MAX_DIRTY_MB_MAX)
+ if (val < 0 || val > OSC_MAX_DIRTY_MB_MAX || val > num_physpages / 4)
return -ERANGE;
spin_lock(&cli->cl_loi_list_lock);
return rc;
}
-int osc_rd_create_low_wm(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct obd_device *obd = data;
-
- if (obd == NULL)
- return 0;
-
- return snprintf(page, count, "%d\n",
- obd->u.cli.cl_oscc.oscc_kick_barrier);
-}
-
-int osc_wr_create_low_wm(struct file *file, const char *buffer,
- unsigned long count, void *data)
-{
- struct obd_device *obd = data;
- int val, rc;
-
- if (obd == NULL)
- return 0;
-
- rc = lprocfs_write_helper(buffer, count, &val);
- if (rc)
- return rc;
-
- if (val < 0)
- return -ERANGE;
-
- spin_lock(&obd->obd_dev_lock);
- obd->u.cli.cl_oscc.oscc_kick_barrier = val;
- spin_unlock(&obd->obd_dev_lock);
-
- return count;
-}
-
int osc_rd_create_count(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
if (val < 0)
return -ERANGE;
+ if (val > OST_MAX_PRECREATE)
+ return -ERANGE;
obd->u.cli.cl_oscc.oscc_grow_count = val;
{ "max_dirty_mb", osc_rd_max_dirty_mb, osc_wr_max_dirty_mb, 0 },
{ "cur_dirty_bytes", osc_rd_cur_dirty_bytes, 0, 0 },
{ "cur_grant_bytes", osc_rd_cur_grant_bytes, 0, 0 },
- {"create_low_watermark", osc_rd_create_low_wm, osc_wr_create_low_wm, 0},
{ "create_count", osc_rd_create_count, osc_wr_create_count, 0 },
{ "prealloc_next_id", osc_rd_prealloc_next_id, 0, 0 },
{ "prealloc_last_id", osc_rd_prealloc_last_id, 0, 0 },
spin_unlock(&oscc->oscc_lock);
} else if (rc != 0 && rc != -EIO) {
DEBUG_REQ(D_ERROR, req,
- "unknown rc %d from async create: failing oscc",
- rc);
+ "unknown rc %d from async create: failing oscc", rc);
oscc->oscc_flags |= OSCC_FLAG_RECOVERING;
+ oscc->oscc_grow_count = OST_MIN_PRECREATE;
spin_unlock(&oscc->oscc_lock);
ptlrpc_fail_import(req->rq_import, req->rq_import_generation);
} else {
+ if (rc == 0)
+ oscc->oscc_flags &= ~OSCC_FLAG_LOW;
spin_unlock(&oscc->oscc_lock);
}
ENTRY;
spin_lock(&oscc->oscc_lock);
+ if (oscc->oscc_grow_count < OST_MAX_PRECREATE &&
+ !(oscc->oscc_flags & (OSCC_FLAG_LOW | OSCC_FLAG_RECOVERING)) &&
+ (__s64)(oscc->oscc_last_id - oscc->oscc_next_id) <=
+ (oscc->oscc_grow_count / 4 + 1)) {
+ oscc->oscc_flags |= OSCC_FLAG_LOW;
+ oscc->oscc_grow_count *= 2;
+ }
+
+ if (oscc->oscc_grow_count > OST_MAX_PRECREATE)
+ oscc->oscc_grow_count = OST_MAX_PRECREATE;
+
if (oscc->oscc_flags & OSCC_FLAG_CREATING ||
oscc->oscc_flags & OSCC_FLAG_RECOVERING) {
spin_unlock(&oscc->oscc_lock);
int rc = 0;
ENTRY;
- if (oscc_has_objects(oscc, oscc->oscc_kick_barrier))
+ if (oscc_has_objects(oscc, oscc->oscc_grow_count / 2))
RETURN(0);
if (!wait)
RETURN(osc_real_create(exp, oa, ea, oti));
}
- /* this is the special case where create removes orphans */
- if ((oa->o_valid & OBD_MD_FLFLAGS) &&
- oa->o_flags == OBD_FL_DELORPHAN) {
+ /* this is the special case where create removes orphans */
+ if ((oa->o_valid & OBD_MD_FLFLAGS) &&
+ oa->o_flags == OBD_FL_DELORPHAN) {
spin_lock(&oscc->oscc_lock);
if (oscc->oscc_flags & OSCC_FLAG_SYNC_IN_PROGRESS) {
spin_unlock(&oscc->oscc_lock);
init_waitqueue_head(&oscc->oscc_waitq);
spin_lock_init(&oscc->oscc_lock);
oscc->oscc_obd = obd;
- oscc->oscc_kick_barrier = 100;
- oscc->oscc_grow_count = 2000;
- oscc->oscc_initial_create_count = 2000;
+ oscc->oscc_grow_count = OST_MIN_PRECREATE;
oscc->oscc_next_id = 2;
oscc->oscc_last_id = 1;
enum async_flags oap_async_flags;
unsigned long oap_interrupted:1;
- struct obd_io_group *oap_oig;
+ struct obd_io_group *oap_oig;
struct oig_callback_context oap_occ;
struct ptlrpc_request *oap_request;
struct client_obd *oap_cli;
struct lov_oinfo *oap_loi;
struct obd_async_page_ops *oap_caller_ops;
- void *oap_caller_data;
+ void *oap_caller_data;
};
struct osc_cache_waiter {
int ocw_rc;
};
-#define OSCC_FLAG_RECOVERING 1
-#define OSCC_FLAG_CREATING 2
-#define OSCC_FLAG_NOSPC 4 /* can't create more objects on this OST */
-#define OSCC_FLAG_SYNC_IN_PROGRESS 8 /* only allow one thread to sync */
+#define OSCC_FLAG_RECOVERING 0x01
+#define OSCC_FLAG_CREATING 0x02
+#define OSCC_FLAG_NOSPC 0x04 /* can't create more objects on OST */
+#define OSCC_FLAG_SYNC_IN_PROGRESS 0x08 /* only allow one thread to sync */
+#define OSCC_FLAG_LOW 0x10
int osc_create(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md **ea, struct obd_trans_info *oti);
char *portals_debug_dumpstack(void)
{
- char *buf = stack_backtrace;
- buf[0] = '\0';
- return buf;
+ return "dump_stack\n";
}
#endif /* __arch_um__ */
initial_connect = 1;
} else {
committed_before_reconnect = imp->imp_peer_committed_transno;;
-
}
-
spin_unlock_irqrestore(&imp->imp_lock, flags);
if (new_uuid) {
spin_unlock_irqrestore(&imp->imp_lock, flags);
if (imp->imp_next_ping <= this_ping || force) {
- if (level == LUSTRE_IMP_DISCON) {
+ if (level == LUSTRE_IMP_DISCON &&
+ !imp->imp_deactive) {
/* wait at least a timeout before
trying recovery again. */
imp->imp_next_ping = jiffies +
* requests. */
if (!active) {
ptlrpc_invalidate_import(imp, 0);
+ imp->imp_deactive = 1;
}
/* When activating, mark import valid, and attempt recovery */
if (active) {
+ imp->imp_deactive = 0;
CDEBUG(D_HA, "setting import %s VALID\n",
imp->imp_target_uuid.uuid);
rc = ptlrpc_recover_import(imp, NULL);
%attr(-, root, root) /usr/bin/lstripe
%attr(-, root, root) /usr/bin/mcreate
%attr(-, root, root) /usr/bin/munlink
-%attr(-, root, root) /usr/bin/mkdirmany
%attr(-, root, root) /usr/lib/lustre/python
%attr(-, root, root) /usr/lib/lustre/examples
echo "done"
echo -n "Recording conflicts in $CONFLICTS ..."
-if $CVS update | grep '^C' > $CONFLICTS; then
+if $CVS update | awk '/^C/ { print $2 }' > $CONFLICTS; then
echo "Conflicts found, fix before committing."
cat $CONFLICTS
else
openfilleddirunlink
copy_attr
rename_many
+memhog
+rmdirmany
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
+
#define TEST_MINOR 120
#define TEST_MAJOR 25
exit(1);
}
+/* UMKA: This stuff inlined here instead of using appropriate header
+ to avoid linking to symbols which is not present in newer libc.
+
+ Currently this is the case, as UML image contains RedHat 9 and
+ developers use something newer (Fedora, etc.). */
+inline unsigned int
+__gnu_dev_major (unsigned long long int __dev)
+{
+ return ((__dev >> 8) & 0xfff) | ((unsigned int) (__dev >> 32) & ~0xfff);
+}
+
+inline unsigned int
+__gnu_dev_minor (unsigned long long int __dev)
+{
+ return (__dev & 0xff) | ((unsigned int) (__dev >> 12) & ~0xff);
+}
+
+inline unsigned long long int
+__gnu_dev_makedev (unsigned int __major, unsigned int __minor)
+{
+ return ((__minor & 0xff) | ((__major & 0xfff) << 8)
+ | (((unsigned long long int) (__minor & ~0xff)) << 12)
+ | (((unsigned long long int) (__major & ~0xfff)) << 32));
+}
+
+#define __minor(dev) __gnu_dev_minor(dev)
+#define __major(dev) __gnu_dev_major(dev)
+#define __makedev(maj, min) __gnu_dev_makedev(maj, min)
+
int main( int argc, char **argv)
{
char *prog = argv[0];
char *filename = argv[1];
int rc;
struct stat st;
- dev_t device = makedev(TEST_MAJOR, TEST_MINOR);
+ dev_t device = __makedev(TEST_MAJOR, TEST_MINOR);
if (argc != 2)
usage(prog);
unlink(filename);
+
/* First try block devices */
rc = mknod(filename, 0700 | S_IFBLK, device);
if ( rc < 0 ) {
prog, filename, errno, strerror(errno));
return 3;
}
+
if ( st.st_rdev != device) {
- fprintf(stderr, "%s: created device other than requested: (%d,%d) instead of (%d,%d)\n", prog, major(st.st_rdev),minor(st.st_rdev),major(device),minor(device));
+ fprintf(stderr, "%s: created device other than requested: (%u,%u) instead of (%u,%u)\n",
+ prog, __major(st.st_rdev),__minor(st.st_rdev),__major(device),__minor(device));
return 4;
}
if (!S_ISBLK(st.st_mode)) {
return 8;
}
if ( st.st_rdev != device) {
- fprintf(stderr, "%s: created device other than requested: (%d,%d) instead of (%d,%d)\n", prog, major(st.st_rdev),minor(st.st_rdev),major(device),minor(device));
+ fprintf(stderr, "%s: created device other than requested: (%u,%u) instead of (%u,%u)\n",
+ prog, __major(st.st_rdev),__minor(st.st_rdev),__major(device),__minor(device));
return 9;
}
if (!S_ISCHR(st.st_mode)) {
#!/bin/sh
set -evx
-MOUNT=${MNT:-/mnt/lustre}
+MOUNT=${MOUNT:-/mnt/lustre}
DIR=${DIR:-$MOUNT}
SRC=${SRC:-`dirname $0`/../..}
export CC=${CC:-gcc}
mount_client $MOUNT
check_mount || return 41
cleanup || return $?
- fi
+ fi
echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555"
[ -d $TMPMTPT ] || mkdir -p $TMPMTPT
UNLINKMANY=${UNLINKMANY:-unlinkmany}
LCTL=${LCTL:-lctl}
-MOUNT1=${MOUNT1:-/mnt/lustre}
+MOUNT1=${MOUNT1:-/mnt/lustre1}
MOUNT2=${MOUNT2:-/mnt/lustre2}
DIR=${DIR:-$MOUNT1}
DIR2=${DIR2:-$MOUNT2}
FSTYPE=${FSTYPE:-ext3}
MOUNT=${MOUNT:-/mnt/lustre}
MOUNT2=${MOUNT2:-${MOUNT}2}
-NETWORKTYPE=${NETWORKTYPE:-tcp}
+NETTYPE=${NETTYPE:-tcp}
OSTCOUNT=${OSTCOUNT:-5}
# OSTDEVN will still override the device for OST N
# create nodes
${LMC} --add node --node localhost || exit 10
-${LMC} --add net --node localhost --nid `hostname` --nettype $NETWORKTYPE || exit 11
-${LMC} --add net --node client --nid '*' --nettype $NETWORKTYPE || exit 12
+${LMC} --add net --node localhost --nid `hostname` --nettype $NETTYPE || exit 11
+${LMC} --add net --node client --nid '*' --nettype $NETTYPE || exit 12
# configure mds server
${LMC} --format --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
MDSNODE=${MDSNODE:-mdev6}
OSTNODE=${OSTNODE:-mdev7}
CLIENT=${CLIENT:-mdev8}
-NETWORKTYPE=${NETWORKTYPE:-tcp}
+NETTYPE=${NETTYPE:-tcp}
MOUNTPT=${MOUNTPT:-/mnt/lustre}
CONFIG=${CONFIG:-recovery-cleanup.xml}
MDSDEV=${MDSDEV:-/tmp/mds1-`hostname`}
make_config() {
rm -f $CONFIG
for NODE in $CLIENT $MDSNODE $OSTNODE; do
- lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \
- --nettype $NETWORKTYPE || exit 4
+ lmc -m $CONFIG --add net --node $NODE --nid `h2$NETTYPE $NODE` \
+ --nettype $NETTYPE || exit 4
done
lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE \
--dev $MDSDEV --size $MDSSIZE || exit 5
make_config() {
rm -f $XMLCONFIG
add_mds mds --dev $MDSDEV --size $MDSSIZE
- add_lov lov1 mds --stripe_sz $STRIPE_BYTES\
+ add_lov lov1 mds --stripe_sz $STRIPE_BYTES \
--stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE
add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE
# client will get evicted here
sysctl -w lustre.fail_loc=0x80000503
do_facet client cp /etc/termcap $DIR/$tfile
- sysctl -w lustre.fail_loc=0
sleep $TIMEOUT
+ sysctl -w lustre.fail_loc=0
+ do_facet client "df $DIR"
# expect cmp to fail
do_facet client "cmp /etc/termcap $DIR/$tfile" && return 1
do_facet client "rm $DIR/$tfile" || return 2
add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE
fi
- add_lov lov1 mds --stripe_sz $STRIPE_BYTES\
+ add_lov lov1 mds --stripe_sz $STRIPE_BYTES \
--stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE --failover
add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE --failover
run_test 4 "Fail OST during read, with verification"
test_5() {
- FREE=`df -h $DIR | tail -n 1 | awk '{ print $3 }'`
+ FREE=`df -P -h $DIR | tail -n 1 | awk '{ print $3 }'`
case $FREE in
*T|*G) FREE=1G;;
esac
add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE
fi
- add_lov lov1 mds --stripe_sz $STRIPE_BYTES\
+ add_lov lov1 mds --stripe_sz $STRIPE_BYTES \
--stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE
add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE
# give multiop a chance to open
sleep 1
mds_evict_client
- df $MOUNT || df $MOUNT || return 1
+ df $MOUNT || sleep 1 && df $MOUNT || return 1
kill -USR1 $pid1
kill -USR1 $pid2
sleep 1
# test MDS recovery after ost failure
test_42() {
- blocks=`df $MOUNT | tail -n 1 | awk '{ print $1 }'`
+ blocks=`df -P $MOUNT | tail -n 1 | awk '{ print $2 }'`
createmany -o $DIR/$tfile-%d 800
replay_barrier ost
unlinkmany $DIR/$tfile-%d 0 400
facet_failover ost
# osc is evicted, fs is smaller
- blocks_after=`df $MOUNT | tail -n 1 | awk '{ print $1 }'`
+ blocks_after=`df -P $MOUNT | tail -n 1 | awk '{ print $2 }'`
[ $blocks_after -lt $blocks ] || return 1
echo wait for MDS to timeout and recover
sleep $((TIMEOUT * 2))
run_test 42 "recovery after ost failure"
# b=2530
-# directory orphans can't be unlinked from PENDING directory
+# timeout in MDS/OST recovery RPC will LBUG MDS
test_43() {
replay_barrier mds
}
run_test 48 "MDS->OSC failure during precreate cleanup (2824)"
-test_49() {
+test_50() {
local osc_dev=`$LCTL device_list | \
awk '(/ost_svc_mds_svc/){print $4}' `
$LCTL --device %$osc_dev recover && $LCTL --device %$osc_dev recover
# give the mds_lov_sync threads a chance to run
sleep 5
}
-run_test 49 "Double OSC recovery, don't LASSERT"
+run_test 50 "Double OSC recovery, don't LASSERT (3812)"
# b3764 timed out lock replay
test_52() {
#endif
static const char usage[] =
-"Usage: %s -u user_id [-g grp_id ] [ -G ] command\n"
+"Usage: %s -u user_id [-g grp_id ] [ -G[gid0,gid1,...] ] command\n"
" -u user_id switch to UID user_id\n"
" -g grp_id switch to GID grp_id\n"
" -G[gid0,gid1,...] set supplementary groups\n";
export TESTGROUP=${TESTGROUP:-"correctness"}
export LUSTRE_TAG=${LUSTRE_TAG:-`cat $PWD/CVS/Tag | cut -c 2-`}
export TESTARCH=${TESTARCH:-`uname -r`}
-export NETWORKTYPE=${NETWORKTYPE:-"tcp"}
+export NETTYPE=${NETTYPE:-"tcp"}
export MACHINENAME=${MACHINENAME:-`hostname`}
usage() {
test_25a() {
echo '== symlink sanity ============================================='
+
mkdir $DIR/d25
ln -s d25 $DIR/s25
touch $DIR/s25/foo || error
}
run_test 51 "special situations: split htree with empty entry =="
+export NUMTEST=70000
test_51b() {
- NUMTEST=70000
- check_kernel_version 40 || NUMTEST=31000
NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'`
- [ $NUMFREE -lt $NUMTEST ] && \
- echo "skipping test 51b, not enough free inodes($NUMFREE)" && \
+ [ $NUMFREE -lt 21000 ] && \
+ echo "skipping test 51b, not enough free inodes ($NUMFREE)" && \
return
+
+ check_kernel_version 40 || NUMTEST=31000
+ [ $NUMFREE -lt $NUMTEST ] && NUMTEST=$(($NUMFREE - 50))
+
mkdir -p $DIR/d51b
(cd $DIR/d51b; mkdirmany t $NUMTEST)
}
run_test 51b "mkdir .../t-0 --- .../t-70000 ===================="
test_51c() {
- NUMTEST=70000
- check_kernel_version 40 || NUMTEST=31000
- NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'`
- [ $NUMFREE -lt $NUMTEST ] && echo "skipping test 51c" && return
- mkdir -p $DIR/d51b
+ [ ! -d $DIR/d51b ] && echo "skipping test 51c: $DIR/51b missing" && \
+ return
+
(cd $DIR/d51b; rmdirmany t $NUMTEST)
}
run_test 51c "rmdir .../t-0 --- .../t-70000 ===================="
set -e
ONLY=${ONLY:-"$*"}
-# bug number for skipped test: 1768 3192
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4 14b"}
+# bug number for skipped test: 1768 3192 3192
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4 14b 14c"}
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
[ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT"
lctl mark "$*" 2> /dev/null || true
}
+trace() {
+ log "STARTING: $*"
+ strace -o $TMP/$1.strace -ttt $*
+ RC=$?
+ log "FINISHED: $*: rc $RC"
+ return 1
+}
+TRACE=${TRACE:-""}
+
run_one() {
if ! mount | grep -q $DIR1; then
$START
fi
- log "== test $1: $2"
+ log "== test $1: $2 `date +%H:%M:%S`"
export TESTNAME=test_$1
test_$1 || error "test_$1: exit with rc=$?"
unset TESTNAME
start $facet
do_facet $facet lctl --device %${facet}_svc abort_recovery
df $MOUNT || echo "first df failed: $?"
+ sleep 1
df $MOUNT || error "post-failover df: $?"
}
# Pretty tests run faster.
equals_msg $testnum: $message
- log "== test $1: $2"
+ log "== test $testnum: $message =========== `date +%H:%M:%S`"
test_${testnum} || error "test_$testnum failed with $?"
}
#etc
OSTSIZE=${OSTSIZE:-100000}
STRIPECNT=${STRIPECNT:-1}
-STRIPESZ=${STRIPESZ:-$((1024 * 1024))}
+STRIPE_BYTES=${STRIPE_BYTES:-$((1024 * 1024))}
OSDTYPE=${OSDTYPE:-obdfilter}
OSTFAILOVER=${OSTFAILOVER:-}
+MOUNT=${MOUNT:-/mnt/lustre}
FSTYPE=${FSTYPE:-ext3}
NETTYPE=${NETTYPE:-tcp}
${LMC} -m $config --add mds --format --node $MDSNODE --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE ||exit 10
# configure ost
-${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPESZ --stripe_cnt $STRIPECNT --stripe_pattern 0 || exit 20
+${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPECNT --stripe_pattern 0 || exit 20
COUNT=1
echo -n "adding OST on:"
for NODE in $OSTNODES; do
echo; echo -n "adding CLIENT on:"
for NODE in $CLIENTS; do
echo -n " $NODE"
- ${LMC} -m $config --add mtpt --node $NODE --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
+ ${LMC} -m $config --add mtpt --node $NODE --path $MOUNT --mds mds1 --lov lov1 || exit 30
done
echo
"add an entry to the portals routing table\n"
"usage: add_route <gateway> <target> [<target>]"},
{"del_route", jt_ptl_del_route, 0,
- "delete the route via the given gateway to the given targets from the portals routing table\n"
+ "delete route via gateway to targets from the portals routing table\n"
"usage: del_route <gateway> [<target>] [<target>]"},
{"set_route", jt_ptl_notify_router, 0,
- "enable/disable routes via the given gateway in the portals routing table\n"
+ "enable/disable routes via gateway in the portals routing table\n"
"usage: set_route <gateway> <up/down> [<time>]"},
- {"route_list", jt_ptl_print_routes, 0, "print the portals routing table\n"
+ {"route_list", jt_ptl_print_routes, 0,
+ "print the portals routing table, same as show_route\n"
"usage: route_list"},
+ {"show_route", jt_ptl_print_routes, 0,
+ "print the portals routing table, same as route_list\n"
+ "usage: show_route"},
{"recv_mem", jt_ptl_rxmem, 0, "set socket receive buffer size, "
"if size is omited the current size is reported.\n"
"usage: recv_mem [size]"},
"usage: detach"},
{"lov_setup", jt_lcfg_lov_setup, 0,
"write setup an lov device\n"
- "usage: lov_setconfig lov-uuid stripe-count stripe-size offset pattern UUID1 [UUID2 ...]"},
+ "usage: lov_setup lov-uuid stripe-count stripe-size offset pattern UUID1 [UUID2 ...]"},
{"lov_getconfig", jt_obd_lov_getconfig, 0,
"read lov configuration from an mds device\n"
"usage: lov_getconfig lov-uuid"},
"debug daemon control and dump to a file\n"
"usage: debug_daemon [start file <#MB>|stop|pause|continue]"},
{"debug_kernel", jt_dbg_debug_kernel, 0,
- "get debug buffer and dump to a file\n"
+ "get debug buffer and dump to a file, same as dk\n"
"usage: debug_kernel [file] [raw]"},
{"dk", jt_dbg_debug_kernel, 0,
- "get debug buffer and dump to a file\n"
+ "get debug buffer and dump to a file, same as debug_kernel\n"
"usage: dk [file] [raw]"},
{"debug_file", jt_dbg_debug_file, 0,
- "read debug buffer from input and dump to output\n"
+ "read debug buffer from input and dump to output, same as dk\n"
"usage: debug_file <input> [output] [raw]"},
+ {"df", jt_dbg_debug_file, 0,
+ "read debug buffer from input and dump to output, same as debug_file\n"
+ "usage: df <input> [output] [raw]"},
{"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer\n"
"usage: clear"},
{"mark", jt_dbg_mark_debug_buf, 0,"insert marker text in kernel debug buffer\n"
if (argc != 2)
return CMD_HELP;
- obd_types[1] = obd_type1;
- obd_types[2] = obd_type2;
+ obd_types[0] = obd_type1;
+ obd_types[1] = obd_type2;
if (strcmp(argv[1], "osts") == 0) {
strcpy(obd_types[0], "osc");
char rawbuf[OBD_MAX_IOCTL_BUFFER];
char *bufl = rawbuf;
char *bufp = buf;
- int max = sizeof(rawbuf);
- struct obd_ioctl_data datal;
+ struct obd_ioctl_data datal = { 0, };
struct obd_statfs osfs_buffer;
while(bufp[0] == ' ')
datal.ioc_inlbuf1 = obd_name;
datal.ioc_inllen1 = strlen(obd_name) + 1;
- obd_ioctl_pack(&datal,&bufl,max);
+ rc = obd_ioctl_pack(&datal, &bufl, OBD_MAX_IOCTL_BUFFER);
+ if (rc) {
+ fprintf(stderr, "internal buffer error packing\n");
+ break;
+ }
rc = ioctl(dirfd(opendir(dir)), OBD_IOC_PING,
bufl);
lmd->lmd_nal = ptl_name2nal(opteq + 1);
} else if(!strcmp(opt, "cluster_id")) {
if (ptl_parse_nid(&cluster_id, opteq+1) != 0) {
- fprintf (stderr, "%s: can't parse NID "
- "%s\n", progname, opteq+1);
+ fprintf(stderr, "%s: can't parse NID "
+ "%s\n", progname, opteq+1);
return (-1);
}
lmd_cluster_id = cluster_id;
parse_route(opteq, opttgts);
} else if (!strcmp(opt, "local_nid")) {
if (ptl_parse_nid(&nid, opteq + 1) != 0) {
- fprintf (stderr, "%s: "
- "can't parse NID %s\n",
- progname,
- opteq+1);
+ fprintf(stderr, "%s: "
+ "can't parse NID %s\n",
+ progname,
+ opteq+1);
return (-1);
}
lmd->lmd_local_nid = nid;
} else if (!strcmp(opt, "server_nid")) {
if (ptl_parse_nid(&nid, opteq + 1) != 0) {
- fprintf (stderr, "%s: "
- "can't parse NID %s\n",
- progname, opteq + 1);
+ fprintf(stderr, "%s: "
+ "can't parse NID %s\n",
+ progname, opteq + 1);
return (-1);
}
lmd->lmd_server_nid = nid;
} else if (!strcmp(opt, "port")) {
lmd->lmd_port = val;
+ } else {
+ fprintf(stderr, "%s: unknown option '%s'\n",
+ progname, opt);
+ return (-1);
}
} else {
val = 1;
/* XXX ClusterID?
* XXX PtlGetId() will be safer if portals is loaded and
* initialised correctly at this time... */
- char buf[256];
+ char buf[256], *ptr = buf;
ptl_nid_t nid;
int rc;
} while (rc != 0 && pfiles[++i] != NULL);
if (rc != 0) {
- fprintf(stderr, "%s: can't read Elan ID from /proc\n",
- progname);
-
- return -1;
+ rc = gethostname(buf, sizeof(buf) - 1);
+ if (rc == 0) {
+ char *tmp = ptr;
+ while ((*tmp >= 'a' && *tmp <= 'z') ||
+ (*tmp >= 'A' && *tmp <= 'Z'))
+ tmp++;
+ ptr = strsep(&tmp, ".");
+ } else {
+ fprintf(stderr,
+ "%s: can't read Elan ID from /proc\n",
+ progname);
+ return -1;
+ }
}
}
- if (ptl_parse_nid (&nid, buf) != 0) {
+ if (ptl_parse_nid (&nid, ptr) != 0) {
fprintf (stderr, "%s: can't parse NID %s\n", progname, buf);
return (-1);
}
progname, hostname);
return (-1);
}
- } else if (lmd->lmd_nal == QSWNAL) {
+ } else if (lmd->lmd_nal == QSWNAL &&lmd->lmd_server_nid == PTL_NID_ANY){
char buf[64];
rc = sscanf(hostname, "%*[^0-9]%63[0-9]", buf);
if (rc != 1) {