+ memset(last->e_name, 0, esize);
+ memcpy(last->e_name, name, last->e_name_len);
+ if (start + offs + value_len > end)
-+ printk("ALERT at %s:%d: 0x%p + %d + %d > 0x%p\n",
-+ __FILE__, __LINE__, start, offs,
-+ value_len, end);
++ printk("ALERT at %s:%d: 0x%p + %d + %zd > 0x%p\n",
++ __FILE__, __LINE__, start, offs,
++ value_len, end);
+ memcpy(start + offs, value, value_len);
+ last = EXT3_XATTR_NEXT(last);
+ *((__u32 *) last) = 0;
+}
+
+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
-+ unsigned long arg)
++ unsigned long arg)
+{
+ int err = 0;
+
extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-@@ -802,6 +809,14 @@
+@@ -802,6 +809,16 @@
extern struct inode_operations ext3_symlink_inode_operations;
extern struct inode_operations ext3_fast_symlink_inode_operations;
+extern void ext3_ext_init(struct super_block *);
+extern void ext3_ext_release(struct super_block *);
+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *);
++extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
++ unsigned int cmd, unsigned long arg);
#endif /* __KERNEL__ */
===================================================================
--- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300
+++ linux-2.6.5-sles9/fs/ext3/mballoc.c 2004-11-09 02:34:25.181340632 +0300
-@@ -0,0 +1,1428 @@
+@@ -0,0 +1,1441 @@
+/*
+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
+int ext3_mb_reserve_blocks(struct super_block *, int);
-+void ext3_mb_release_blocks(struct super_block *, int);
+void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
+void ext3_mb_free_committed_blocks(struct super_block *);
+
+#define mb_correct_addr_and_bit(bit,addr) \
+{ \
-+ if ((unsigned) addr & 1) { \
++ if ((unsigned long)addr & 1) { \
+ bit += 8; \
+ addr--; \
+ } \
-+ if ((unsigned) addr & 2) { \
++ if ((unsigned long)addr & 2) { \
+ bit += 16; \
+ addr--; \
+ addr--; \
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+
-+ J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
-+ J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
++ J_ASSERT(sbi->s_buddy_blocks[group]->bb_bitmap);
++ J_ASSERT(sbi->s_buddy_blocks[group]->bb_buddy);
+
+ /* load bitmap */
-+ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
++ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_bitmap);
+ if (e3b->bd_bh == NULL) {
+ ext3_error(sb, "ext3_mb_load_desc",
+ "can't get block for buddy bitmap\n");
+ J_ASSERT(buffer_uptodate(e3b->bd_bh));
+
+ /* load buddy */
-+ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
++ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_buddy);
+ if (e3b->bd_bh2 == NULL) {
+ ext3_error(sb, "ext3_mb_load_desc",
+ "can't get block for buddy bitmap\n");
+ e3b->bd_bitmap = e3b->bd_bh->b_data;
+ e3b->bd_buddy = e3b->bd_bh2->b_data;
+ e3b->bd_blkbits = sb->s_blocksize_bits;
-+ e3b->bd_bd = sbi->s_buddy_blocks + group;
++ e3b->bd_bd = sbi->s_buddy_blocks[group];
+ e3b->bd_sb = sb;
+
+ return 0;
+static inline void
+ext3_lock_group(struct super_block *sb, int group)
+{
-+ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
+}
+
+static inline void
+ext3_unlock_group(struct super_block *sb, int group)
+{
-+ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
+}
+
+static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
+}
+
+int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
-+ unsigned long goal, int *len, int flags, int *errp)
++ unsigned long goal, int *len, int flags, int *errp)
+{
+ struct buffer_head *bitmap_bh = NULL;
+ struct ext3_allocation_context ac;
+ goto out2;
+ }
+
-+ /* loop over the blocks, nad create buddies for free ones */
++ /* loop over the blocks, and create buddies for free ones */
+ for (i = 0; i < sb->s_blocksize * 8; i++) {
+ if (!mb_test_bit(i, (void *) bh->b_data)) {
+ mb_free_blocks(&e3b, i, 1);
+
+#define MB_CREDITS \
+ (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \
-+ + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
++ 2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
+
+int ext3_mb_init_backend(struct super_block *sb)
+{
+ tid_t target;
+ int err, i;
+
-+ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
++ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks *) *
+ sbi->s_groups_count, GFP_KERNEL);
+ if (sbi->s_buddy_blocks == NULL) {
-+ printk("can't allocate mem for buddy maps\n");
++ printk("EXT3-fs: can't allocate mem for buddy maps\n");
+ return -ENOMEM;
+ }
+ memset(sbi->s_buddy_blocks, 0,
-+ sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
++ sizeof(struct ext3_buddy_group_blocks *) * sbi->s_groups_count);
+ sbi->s_buddy = NULL;
+
+ down(&root->i_sem);
+ strlen(EXT3_BUDDY_FILE));
+ if (IS_ERR(db)) {
+ err = PTR_ERR(db);
-+ printk("can't lookup buddy file: %d\n", err);
++ printk("EXT3-fs: can't lookup buddy file: %d\n", err);
+ goto out;
+ }
+
+ struct buffer_head *bh = NULL;
+ handle_t *handle;
+
++ sbi->s_buddy_blocks[i] =
++ kmalloc(sizeof(struct ext3_buddy_group_blocks),
++ GFP_KERNEL);
++ if (sbi->s_buddy_blocks[i] == NULL) {
++ printk("EXT3-fs: can't allocate mem for buddy\n");
++ err = -ENOMEM;
++ goto out2;
++ }
++
+ handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ printk("can't get block for buddy bitmap: %d\n", err);
+ goto out2;
+ }
-+ sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
++ sbi->s_buddy_blocks[i]->bb_bitmap = bh->b_blocknr;
+ brelse(bh);
+
+ /* allocate block for buddy */
+ printk("can't get block for buddy: %d\n", err);
+ goto out2;
+ }
-+ sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
++ sbi->s_buddy_blocks[i]->bb_buddy = bh->b_blocknr;
+ brelse(bh);
+ ext3_journal_stop(handle);
-+ spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
-+ sbi->s_buddy_blocks[i].bb_md_cur = NULL;
-+ sbi->s_buddy_blocks[i].bb_tid = 0;
++ spin_lock_init(&sbi->s_buddy_blocks[i]->bb_lock);
++ sbi->s_buddy_blocks[i]->bb_md_cur = NULL;
++ sbi->s_buddy_blocks[i]->bb_tid = 0;
+ }
+
+ if (journal_start_commit(sbi->s_journal, &target))
+int ext3_mb_release(struct super_block *sb)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int i;
+
+ if (!test_opt(sb, MBALLOC))
+ return 0;
+ spin_unlock(&sbi->s_md_lock);
+ ext3_mb_free_committed_blocks(sb);
+
-+ if (sbi->s_buddy_blocks)
++ if (sbi->s_buddy_blocks) {
++ for (i = 0; i < sbi->s_groups_count; i++)
++ if (sbi->s_buddy_blocks[i])
++ kfree(sbi->s_buddy_blocks[i]);
+ kfree(sbi->s_buddy_blocks);
++ }
+ if (sbi->s_buddy)
+ iput(sbi->s_buddy);
+ if (sbi->s_blocks_reserved)
+
+ /* init file for buddy data */
+ clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
-+ ext3_mb_init_backend(sb);
++ if (ext3_mb_init_backend(sb))
++ return 0;
+
+ es = EXT3_SB(sb)->s_es;
+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
+ ext3_mb_free_blocks(handle, inode, block, count, metadata);
+ return;
+}
-+
Index: linux-2.6.5-sles9/fs/ext3/super.c
===================================================================
--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300
extern unsigned long ext3_count_free_blocks (struct super_block *);
extern void ext3_check_blocks_bitmap (struct super_block *);
extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+@@ -743,6 +746,13 @@
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+ unsigned long);
+
++/* mballoc.c */
++extern int ext3_mb_init(struct super_block *sb);
++extern int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++ unsigned long goal,int *len, int flags,int *errp);
++extern int ext3_mb_release(struct super_block *sb);
++extern void ext3_mb_release_blocks(struct super_block *, int);
++
+ /* namei.c */
+ extern int ext3_orphan_add(handle_t *, struct inode *);
+ extern int ext3_orphan_del(handle_t *, struct inode *);
Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h
===================================================================
--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300
#endif
+
+ /* for buddy allocator */
-+ struct ext3_buddy_group_blocks *s_buddy_blocks;
++ struct ext3_buddy_group_blocks **s_buddy_blocks;
+ struct inode *s_buddy;
+ long s_blocks_reserved;
+ spinlock_t s_reserve_lock;
- don't try to complete elan receive that already failed (4012)
- free RPC server reply state on error (5406)
- clean up thread from ptlrpc_start_thread() on error (5160)
+ - readahead could read extra page into cache that wasn't ejected (5388)
+ - prevent races in class_attach/setup/cleanup/detach (5260)
+ - don't dereference de->d_inode after l_dput of de (5458)
+ - use "int" for stripe value returned from lock_to_stripe (5544)
+ - mballoc allocation and error-checking fixes in 2.6 (5504)
+ - block device patches to fix I/O request sizes in 2.6 (5482)
+ - protect KMS changes with its own lock, not i_sem (5492, 5624, 3453)
+ - look up hostnames for IB nals (5602)
* miscellania
- service request history (4965)
- put {ll,lov,osc}_async_page structs in a single slab (4699)
- create an "evict_client" /proc entry on OSTs, like the MDS has
- - fix mount usage message, return errors per mount(8)
+ - fix mount usage message, return errors per mount(8) (5168)
- change grep [] to grep "[]" in tests so they work in more UMLs
- fix ppc64/x86_64 spec to use %{_libdir} instead of /usr/lib (5389)
+ - remove ancient LOV_MAGIC_V0 EA support (5047)
+ - add "disk I/Os in flight" and "I/O req time" stats in obdfilter
2004-11-23 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.4.0
tbd Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.2.9
+ * bug fixes
- send OST transaction number in read/write reply to free req (4966)
- don't ASSERT in ptl_send_rpc() if we run out of memory (5119)
- lock /proc/sys/portals/routes internal state, avoiding oops (4827)
AC_ARG_ENABLE([liblustre],
AC_HELP_STRING([--disable-liblustre],
[disable building of Lustre library]),
- [],[enable_liblustre=$enable_libsysio])
+ [],[enable_liblustre="no"])
AC_MSG_RESULT([$enable_liblustre])
+# only build sysio if liblustre is built
+with_sysio="$enable_liblustre"
AC_MSG_CHECKING([whether to build mpitests])
AC_ARG_ENABLE([mpitests],
# other configure checks
#
AC_DEFUN([LC_CONFIGURE],
-[# include/liblustre.h
-AC_CHECK_HEADERS([asm/page.h sys/user.h stdint.h])
+[LC_CONFIG_OBD_BUFFER_SIZE
-LC_CONFIG_OBD_BUFFER_SIZE
+# include/liblustre.h
+AC_CHECK_HEADERS([asm/page.h sys/user.h stdint.h])
# liblustre/llite_lib.h
AC_CHECK_HEADERS([xtio.h file.h])
-m4_define([LUSTRE_VERSION],[1.4.0.3])
+m4_define([LUSTRE_VERSION],[1.4.0.5])
OP; \
}} while(0)
-#define LL_CDEBUG_PAGE(mask, page, fmt, arg...) \
- CDEBUG(mask, "page %p map %p index %lu flags %lx count %u priv %0lx: " \
- fmt, page, page->mapping, page->index, page->flags, \
+#define LL_CDEBUG_PAGE(mask, page, fmt, arg...) \
+ CDEBUG(mask, "page %p map %p index %lu flags %lx count %u priv %0lx: "\
+ fmt, page, page->mapping, page->index, (long)page->flags, \
page_count(page), page->private, ## arg)
/* lib/debug.c */
int dump_ioo(struct obd_ioobj *nb);
int dump_req(struct ptlrpc_request *req);
int dump_obdo(struct obdo *oa);
+void dump_lsm(int level, struct lov_stripe_md *lsm);
int block_debug_setup(void *addr, int len, __u64 off, __u64 id);
int block_debug_check(char *who, void *addr, int len, __u64 off, __u64 id);
#endif
#define _LUSTRE_IDL_H_
#ifdef __KERNEL__
-# include <linux/ioctl.h>
# include <asm/types.h>
# include <linux/types.h>
-# include <linux/list.h>
-# include <linux/string.h> /* for strncpy, below */
# include <linux/fs.h> /* to check for FMODE_EXEC, lest we redefine */
#else
#ifdef __CYGWIN__
# include <asm/types.h>
# include <stdint.h>
#endif
-# include <portals/list.h>
-# include <string.h>
#endif
/* Defn's shared with user-space. */
typedef uint32_t obd_flag;
typedef uint32_t obd_count;
-#define OBD_FL_INLINEDATA (0x00000001)
-#define OBD_FL_OBDMDEXISTS (0x00000002)
#define OBD_FL_DELORPHAN (0x00000004) /* if set in o_flags delete orphans */
-#define OBD_FL_NORPC (0x00000008) // if set in o_flags set in OSC not OST
-#define OBD_FL_IDONLY (0x00000010) // if set in o_flags only adjust obj id
#define OBD_FL_RECREATE_OBJS (0x00000020) // recreate missing obj
#define OBD_FL_DEBUG_CHECK (0x00000040) /* echo client/server debug check */
struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
};
-#define LOV_MAGIC_V0 0x0BD00BD0
-
-struct lov_ost_data_v0 { /* per-stripe data structure (little-endian)*/
- __u64 l_object_id; /* OST object ID */
-};
-
-struct lov_mds_md_v0 { /* LOV EA mds/wire data (little-endian) */
- __u32 lmm_magic; /* magic number = LOV_MAGIC_V0 */
- __u64 lmm_object_id; /* LOV object ID */
- __u32 lmm_stripe_size; /* size of the stripe in bytes (not RAID1) */
- __u32 lmm_stripe_offset; /* starting stripe offset in lmm_objects */
- __u16 lmm_stripe_count; /* number of stipes in use for this object */
- __u16 lmm_ost_count; /* how many OST idx are in this LOV md */
- struct lov_ost_data_v0 lmm_objects[0];
-} __attribute__((packed));
-
#define OBD_MD_FLALL (0xffffffff)
#define OBD_MD_FLID (0x00000001) /* object ID */
#define OBD_MD_FLATIME (0x00000002) /* access time */
__u64 lli_io_epoch;
unsigned long lli_flags;
- /* this lock protects s_d_w and p_w_ll */
+ /* this lock protects s_d_w, p_w_ll, and the KMS */
spinlock_t lli_lock;
int lli_send_done_writing;
struct list_head lli_pending_write_llaps;
* considered full when less than ?_MAXREQSIZE is left in them.
*/
-#define LDLM_NUM_THREADS min(smp_num_cpus * smp_num_cpus * 8, 64)
+#define LDLM_NUM_THREADS min((int)(smp_num_cpus * smp_num_cpus * 8), 64)
#define LDLM_NBUFS 64
#define LDLM_BUFSIZE (8 * 1024)
#define LDLM_MAXREQSIZE (5 * 1024)
wait_queue_head_t *set_wakeup_ptr;
struct list_head set_requests;
set_interpreter_func set_interpret; /* completion callback */
- void *set_arg; /* completion context */
+ void *set_arg; /* completion context */
/* locked so that any old caller can communicate requests to
* the set holder who can then fold them into the lock-free set */
spinlock_t set_new_req_lock;
* callees of this method are encouraged to abort their state
* in the oig. This may be called multiple times. */
void (*occ_interrupted)(struct oig_callback_context *occ);
- unsigned interrupted:1;
+ unsigned int interrupted:1;
};
/* if we find more consumers this could be generalized */
struct semaphore fo_alloc_lock;
+ int fo_r_in_flight; /* protected by fo_objidlock */
+ int fo_w_in_flight; /* protected by fo_objidlock */
+
struct obd_histogram fo_r_pages;
struct obd_histogram fo_w_pages;
+ struct obd_histogram fo_read_rpc_hist;
+ struct obd_histogram fo_write_rpc_hist;
+ struct obd_histogram fo_r_io_time;
+ struct obd_histogram fo_w_io_time;
struct obd_histogram fo_r_discont_pages;
struct obd_histogram fo_w_discont_pages;
struct obd_histogram fo_r_discont_blocks;
struct obd_histogram fo_w_discont_blocks;
+ struct obd_histogram fo_r_disk_iosize;
+ struct obd_histogram fo_w_disk_iosize;
};
struct mds_server_data;
struct obd_histogram cl_write_rpc_hist;
struct obd_histogram cl_read_page_hist;
struct obd_histogram cl_write_page_hist;
+ struct obd_histogram cl_read_offset_hist;
+ struct obd_histogram cl_write_offset_hist;
struct mdc_rpc_lock *cl_rpc_lock;
struct mdc_rpc_lock *cl_setattr_lock;
int obd_minor;
unsigned int obd_attached:1, obd_set_up:1, obd_recovering:1,
obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1,
- obd_no_recov:1, obd_stopping:1;
+ obd_no_recov:1, obd_stopping:1, obd_starting:1;
atomic_t obd_refcount;
wait_queue_head_t obd_refcount_waitq;
struct proc_dir_entry *obd_proc_entry;
int class_register_type(struct obd_ops *ops, struct lprocfs_vars *, char *nm);
int class_unregister_type(char *nm);
-struct obd_device *class_newdev(struct obd_type *type);
+struct obd_device *class_newdev(struct obd_type *type, char *name);
void class_release_dev(struct obd_device *obd);
int class_name2dev(char *name);
return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo);
}
-static inline int lov_mds_md_v0_size(int stripes)
-{
- return sizeof(struct lov_mds_md_v0) +
- stripes * sizeof(struct lov_ost_data_v0);
-}
-
#define lov_mds_md_size(stripes) lov_mds_md_v1_size(stripes)
static inline int lov_mds_md_v1_size(int stripes)
{
#define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214
#define OBD_FAIL_OST_ENOSPC 0x215
#define OBD_FAIL_OST_EROFS 0x216
+#define OBD_FAIL_OST_ENOENT 0x217
#define OBD_FAIL_LDLM 0x300
#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301
#define OBD_FAIL_CHECK_ONCE(id) \
({ int _ret_ = 0; \
if (OBD_FAIL_CHECK(id)) { \
- CERROR("obd_fail_loc=%x\n", id); \
+ CERROR("*** obd_fail_loc=%x ***\n", id); \
obd_fail_loc |= OBD_FAILED; \
if ((id) & OBD_FAIL_ONCE) \
obd_fail_loc |= OBD_FAIL_ONCE; \
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=m
+CONFIG_SD_IOSTATS=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
CONFIG_BLK_DEV_SR=m
CONFIG_RCFS_FS=m
CONFIG_CKRM_TYPE_TASKCLASS=y
CONFIG_CKRM_RES_NUMTASKS=m
+CONFIG_CKRM_CPU_SCHEDULE=y
+# CONFIG_CKRM_CPU_SCHEDULE_AT_BOOT is not set
CONFIG_CKRM_TYPE_SOCKETCLASS=y
CONFIG_CKRM_RBCE=m
CONFIG_CKRM_CRBCE=m
CONFIG_HOTPLUG_PCI_FAKE=m
CONFIG_HOTPLUG_PCI_AMD=m
CONFIG_HOTPLUG_PCI_ACPI=m
+# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
CONFIG_HOTPLUG_PCI_CPCI=y
CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=m
+CONFIG_SD_IOSTATS=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
CONFIG_BLK_DEV_SR=m
# CONFIG_KEYBOARD_LKKBD is not set
CONFIG_KEYBOARD_XTKBD=m
CONFIG_KEYBOARD_NEWTON=m
+# CONFIG_KEYBOARD_POSFILTER is not set
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=y
CONFIG_MOUSE_SERIAL=m
CONFIG_RCFS_FS=m
CONFIG_CKRM_TYPE_TASKCLASS=y
CONFIG_CKRM_RES_NUMTASKS=m
+CONFIG_CKRM_CPU_SCHEDULE=y
+# CONFIG_CKRM_CPU_SCHEDULE_AT_BOOT is not set
CONFIG_CKRM_TYPE_SOCKETCLASS=y
CONFIG_CKRM_RBCE=m
CONFIG_CKRM_CRBCE=m
CONFIG_HOTPLUG_PCI_FAKE=m
CONFIG_HOTPLUG_PCI_AMD=m
CONFIG_HOTPLUG_PCI_ACPI=m
+# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
CONFIG_HOTPLUG_PCI_CPCI=y
CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=m
+CONFIG_SD_IOSTATS=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
CONFIG_BLK_DEV_SR=m
# CONFIG_KEYBOARD_LKKBD is not set
CONFIG_KEYBOARD_XTKBD=m
CONFIG_KEYBOARD_NEWTON=m
+# CONFIG_KEYBOARD_POSFILTER is not set
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=y
CONFIG_MOUSE_SERIAL=m
--- /dev/null
+Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/include/scsi/scsi_host.h
+===================================================================
+--- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik.orig/include/scsi/scsi_host.h 2005-01-07 04:23:12.344880136 -0800
++++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/include/scsi/scsi_host.h 2005-01-07 04:23:33.338688592 -0800
+@@ -24,7 +24,7 @@
+ * used in one scatter-gather request.
+ */
+ #define SG_NONE 0
+-#define SG_ALL 0xff
++#define SG_ALL 256
+
+
+ #define DISABLE_CLUSTERING 0
+Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/include/linux/blkdev.h
+===================================================================
+--- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik.orig/include/linux/blkdev.h 2004-11-11 07:28:28.000000000 -0800
++++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/include/linux/blkdev.h 2005-01-07 04:24:33.819494112 -0800
+@@ -647,8 +647,8 @@
+ extern void blk_rq_prep_restart(struct request *);
+ extern int blkdev_issue_flush(struct block_device *, sector_t *);
+
+-#define MAX_PHYS_SEGMENTS 128
+-#define MAX_HW_SEGMENTS 128
++#define MAX_PHYS_SEGMENTS 256
++#define MAX_HW_SEGMENTS 256
+ #define MAX_SECTORS 255
+
+ #define MAX_SEGMENT_SIZE 65536
--- /dev/null
+diff -urN clean/arch/mips/kernel/irixelf.c linux-2.6.5-SLES9_SP1_BRANCH_91/arch/mips/kernel/irixelf.c
+--- clean/arch/mips/kernel/irixelf.c 2005-01-09 12:25:26.000000000 -0500
++++ linux-2.6.5-SLES9_SP1_BRANCH_91/arch/mips/kernel/irixelf.c 2005-01-19 19:29:59.909824951 -0500
+@@ -128,7 +128,7 @@
+ end = PAGE_ALIGN(end);
+ if (end <= start)
+ return;
+- do_brk(start, end - start);
++ do_brk_locked(start, end - start);
+ }
+
+
+diff -urN clean/arch/x86_64/ia32/ia32_aout.c linux-2.6.5-SLES9_SP1_BRANCH_91/arch/x86_64/ia32/ia32_aout.c
+--- clean/arch/x86_64/ia32/ia32_aout.c 2005-01-09 12:25:33.000000000 -0500
++++ linux-2.6.5-SLES9_SP1_BRANCH_91/arch/x86_64/ia32/ia32_aout.c 2005-01-19 19:30:50.255145196 -0500
+@@ -114,7 +114,7 @@
+ start = PAGE_ALIGN(start);
+ end = PAGE_ALIGN(end);
+ if (end > start) {
+- unsigned long addr = do_brk(start, end - start);
++ unsigned long addr = do_brk_locked(start, end - start);
+ if (BAD_ADDR(addr))
+ return addr;
+ }
+@@ -327,7 +327,7 @@
+ pos = 32;
+ map_size = ex.a_text+ex.a_data;
+
+- error = do_brk(text_addr & PAGE_MASK, map_size);
++ error = do_brk_locked(text_addr & PAGE_MASK, map_size);
+ if (error != (text_addr & PAGE_MASK)) {
+ send_sig(SIGKILL, current, 0);
+ return error;
+@@ -363,7 +363,7 @@
+
+ if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+ loff_t pos = fd_offset;
+- do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
++ do_brk_locked(N_TXTADDR(ex), ex.a_text+ex.a_data);
+ bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex),
+ ex.a_text+ex.a_data, &pos);
+ flush_icache_range((unsigned long) N_TXTADDR(ex),
+@@ -476,7 +476,7 @@
+ }
+ #endif
+
+- do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
++ do_brk_locked(start_addr, ex.a_text + ex.a_data + ex.a_bss);
+
+ file->f_op->read(file, (char *)start_addr,
+ ex.a_text + ex.a_data, &pos);
+@@ -500,7 +500,7 @@
+ len = PAGE_ALIGN(ex.a_text + ex.a_data);
+ bss = ex.a_text + ex.a_data + ex.a_bss;
+ if (bss > len) {
+- error = do_brk(start_addr + len, bss - len);
++ error = do_brk_locked(start_addr + len, bss - len);
+ retval = error;
+ if (error != start_addr + len)
+ goto out;
+diff -urN clean/fs/binfmt_aout.c linux-2.6.5-SLES9_SP1_BRANCH_91/fs/binfmt_aout.c
+--- clean/fs/binfmt_aout.c 2005-01-09 12:25:33.000000000 -0500
++++ linux-2.6.5-SLES9_SP1_BRANCH_91/fs/binfmt_aout.c 2005-01-19 19:31:40.480490745 -0500
+@@ -51,7 +51,7 @@
+ start = PAGE_ALIGN(start);
+ end = PAGE_ALIGN(end);
+ if (end > start) {
+- unsigned long addr = do_brk(start, end - start);
++ unsigned long addr = do_brk_locked(start, end - start);
+ if (BAD_ADDR(addr))
+ return addr;
+ }
+@@ -323,10 +323,10 @@
+ loff_t pos = fd_offset;
+ /* Fuck me plenty... */
+ /* <AOL></AOL> */
+- error = do_brk(N_TXTADDR(ex), ex.a_text);
++ error = do_brk_locked(N_TXTADDR(ex), ex.a_text);
+ bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
+ ex.a_text, &pos);
+- error = do_brk(N_DATADDR(ex), ex.a_data);
++ error = do_brk_locked(N_DATADDR(ex), ex.a_data);
+ bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
+ ex.a_data, &pos);
+ goto beyond_if;
+@@ -347,7 +347,7 @@
+ map_size = ex.a_text+ex.a_data;
+ #endif
+
+- error = do_brk(text_addr & PAGE_MASK, map_size);
++ error = do_brk_locked(text_addr & PAGE_MASK, map_size);
+ if (error != (text_addr & PAGE_MASK)) {
+ send_sig(SIGKILL, current, 0);
+ return error;
+@@ -381,7 +381,7 @@
+
+ if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+ loff_t pos = fd_offset;
+- do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
++ do_brk_locked(N_TXTADDR(ex), ex.a_text+ex.a_data);
+ bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex),
+ ex.a_text+ex.a_data, &pos);
+ flush_icache_range((unsigned long) N_TXTADDR(ex),
+@@ -486,7 +486,7 @@
+ error_time = jiffies;
+ }
+
+- do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
++ do_brk_locked(start_addr, ex.a_text + ex.a_data + ex.a_bss);
+
+ file->f_op->read(file, (char *)start_addr,
+ ex.a_text + ex.a_data, &pos);
+@@ -510,7 +510,7 @@
+ len = PAGE_ALIGN(ex.a_text + ex.a_data);
+ bss = ex.a_text + ex.a_data + ex.a_bss;
+ if (bss > len) {
+- error = do_brk(start_addr + len, bss - len);
++ error = do_brk_locked(start_addr + len, bss - len);
+ retval = error;
+ if (error != start_addr + len)
+ goto out;
+diff -urN clean/fs/binfmt_elf.c linux-2.6.5-SLES9_SP1_BRANCH_91/fs/binfmt_elf.c
+--- clean/fs/binfmt_elf.c 2005-01-09 12:25:33.000000000 -0500
++++ linux-2.6.5-SLES9_SP1_BRANCH_91/fs/binfmt_elf.c 2005-01-19 19:29:59.914823791 -0500
+@@ -88,7 +88,7 @@
+ start = ELF_PAGEALIGN(start);
+ end = ELF_PAGEALIGN(end);
+ if (end > start) {
+- unsigned long addr = do_brk(start, end - start);
++ unsigned long addr = do_brk_locked(start, end - start);
+ if (BAD_ADDR(addr))
+ return addr;
+ }
+@@ -406,7 +406,7 @@
+
+ /* Map the last of the bss segment */
+ if (last_bss > elf_bss) {
+- error = do_brk(elf_bss, last_bss - elf_bss);
++ error = do_brk_locked(elf_bss, last_bss - elf_bss);
+ if (BAD_ADDR(error))
+ goto out_close;
+ }
+@@ -446,7 +446,7 @@
+ goto out;
+ }
+
+- do_brk(0, text_data);
++ do_brk_locked(0, text_data);
+ if (!interpreter->f_op || !interpreter->f_op->read)
+ goto out;
+ if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
+@@ -454,7 +454,7 @@
+ flush_icache_range((unsigned long)addr,
+ (unsigned long)addr + text_data);
+
+- do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
++ do_brk_locked(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
+ interp_ex->a_bss);
+ elf_entry = interp_ex->a_entry;
+
+@@ -1006,7 +1006,7 @@
+ len = ELF_PAGESTART(elf_phdata->p_filesz + elf_phdata->p_vaddr + ELF_MIN_ALIGN - 1);
+ bss = elf_phdata->p_memsz + elf_phdata->p_vaddr;
+ if (bss > len)
+- do_brk(len, bss - len);
++ do_brk_locked(len, bss - len);
+ error = 0;
+
+ out_free_ph:
+diff -urN clean/include/linux/mm.h linux-2.6.5-SLES9_SP1_BRANCH_91/include/linux/mm.h
+--- clean/include/linux/mm.h 2005-01-09 12:25:34.000000000 -0500
++++ linux-2.6.5-SLES9_SP1_BRANCH_91/include/linux/mm.h 2005-01-19 19:29:59.915823559 -0500
+@@ -821,6 +821,7 @@
+ extern int do_munmap(struct mm_struct *, unsigned long, size_t);
+
+ extern unsigned long do_brk(unsigned long, unsigned long);
++extern unsigned long do_brk_locked(unsigned long, unsigned long);
+
+ /* vma merging helpers */
+ static inline void
+diff -urN clean/mm/mmap.c linux-2.6.5-SLES9_SP1_BRANCH_91/mm/mmap.c
+--- clean/mm/mmap.c 2005-01-09 12:25:34.000000000 -0500
++++ linux-2.6.5-SLES9_SP1_BRANCH_91/mm/mmap.c 2005-01-19 19:29:59.917823095 -0500
+@@ -1658,6 +1658,20 @@
+
+ EXPORT_SYMBOL(do_brk);
+
++/* locking version of do_brk. */
++unsigned long do_brk_locked(unsigned long addr, unsigned long len)
++{
++ unsigned long ret;
++
++ down_write(¤t->mm->mmap_sem);
++ ret = do_brk(addr, len);
++ up_write(¤t->mm->mmap_sem);
++
++ return ret;
++}
++
++EXPORT_SYMBOL(do_brk_locked);
++
+ /* Release all mmaps. */
+ void exit_mmap(struct mm_struct *mm)
+ {
+diff -urN clean/mm/nommu.c linux-2.6.5-SLES9_SP1_BRANCH_91/mm/nommu.c
+--- clean/mm/nommu.c 2005-01-09 12:25:27.000000000 -0500
++++ linux-2.6.5-SLES9_SP1_BRANCH_91/mm/nommu.c 2005-01-19 19:29:59.918822864 -0500
+@@ -227,6 +227,11 @@
+ return audit_lresult(mm->brk = brk);
+ }
+
++unsigned long do_brk_locked(unsigned long addr, unsigned long len)
++{
++ return -ENOMEM;
++}
++
+ /*
+ * Combine the mmap "prot" and "flags" argument into one "vm_flags" used
+ * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
+ memset(last->e_name, 0, esize);
+ memcpy(last->e_name, name, last->e_name_len);
+ if (start + offs + value_len > end)
-+ printk("ALERT at %s:%d: 0x%p + %d + %d > 0x%p\n",
-+ __FILE__, __LINE__, start, offs,
-+ value_len, end);
++ printk("ALERT at %s:%d: 0x%p + %d + %zd > 0x%p\n",
++ __FILE__, __LINE__, start, offs,
++ value_len, end);
+ memcpy(start + offs, value, value_len);
+ last = EXT3_XATTR_NEXT(last);
+ *((__u32 *) last) = 0;
+}
+
+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
-+ unsigned long arg)
++ unsigned long arg)
+{
+ int err = 0;
+
extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-@@ -770,6 +779,14 @@
+@@ -770,6 +779,16 @@
extern struct inode_operations ext3_symlink_inode_operations;
extern struct inode_operations ext3_fast_symlink_inode_operations;
+/* extents.c */
+extern int ext3_ext_writepage_trans_blocks(struct inode *, int);
+extern int ext3_ext_get_block(handle_t *, struct inode *, long,
-+ struct buffer_head *, int);
++ struct buffer_head *, int);
+extern void ext3_ext_truncate(struct inode *);
+extern void ext3_ext_init(struct super_block *);
+extern void ext3_ext_release(struct super_block *);
+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *);
++extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
++ unsigned int cmd, unsigned long arg);
#endif /* __KERNEL__ */
+}
+
+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
-+ unsigned long arg)
++ unsigned long arg)
+{
+ int err = 0;
+
extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-@@ -770,6 +780,14 @@
+@@ -770,6 +780,16 @@
extern struct inode_operations ext3_symlink_inode_operations;
extern struct inode_operations ext3_fast_symlink_inode_operations;
+extern void ext3_ext_init(struct super_block *);
+extern void ext3_ext_release(struct super_block *);
+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *);
++extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
++ unsigned int cmd, unsigned long arg);
#endif /* __KERNEL__ */
+}
+
+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
-+ unsigned long arg)
++ unsigned long arg)
+{
+ int err = 0;
+
extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-@@ -770,6 +779,14 @@
+@@ -770,6 +779,16 @@
extern struct inode_operations ext3_symlink_inode_operations;
extern struct inode_operations ext3_fast_symlink_inode_operations;
+extern void ext3_ext_init(struct super_block *);
+extern void ext3_ext_release(struct super_block *);
+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *);
++extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
++ unsigned int cmd, unsigned long arg);
#endif /* __KERNEL__ */
+}
+
+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
-+ unsigned long arg)
++ unsigned long arg)
+{
+ int err = 0;
+
extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-@@ -769,6 +778,14 @@
+@@ -769,6 +778,16 @@
extern struct inode_operations ext3_symlink_inode_operations;
extern struct inode_operations ext3_fast_symlink_inode_operations;
+extern void ext3_ext_init(struct super_block *);
+extern void ext3_ext_release(struct super_block *);
+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *);
++extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
++ unsigned int cmd, unsigned long arg);
#endif /* __KERNEL__ */
+}
+
+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
-+ unsigned long arg)
++ unsigned long arg)
+{
+ int err = 0;
+
extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-@@ -802,6 +809,14 @@
+@@ -802,6 +809,16 @@
extern struct inode_operations ext3_symlink_inode_operations;
extern struct inode_operations ext3_fast_symlink_inode_operations;
+extern void ext3_ext_init(struct super_block *);
+extern void ext3_ext_release(struct super_block *);
+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *);
++extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
++ unsigned int cmd, unsigned long arg);
#endif /* __KERNEL__ */
===================================================================
--- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300
+++ linux-2.6.5-sles9/fs/ext3/mballoc.c 2004-11-09 02:34:25.181340632 +0300
-@@ -0,0 +1,1428 @@
+@@ -0,0 +1,1441 @@
+/*
+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
+int ext3_mb_reserve_blocks(struct super_block *, int);
-+void ext3_mb_release_blocks(struct super_block *, int);
+void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
+void ext3_mb_free_committed_blocks(struct super_block *);
+
+#define mb_correct_addr_and_bit(bit,addr) \
+{ \
-+ if ((unsigned) addr & 1) { \
++ if ((unsigned long)addr & 1) { \
+ bit += 8; \
+ addr--; \
+ } \
-+ if ((unsigned) addr & 2) { \
++ if ((unsigned long)addr & 2) { \
+ bit += 16; \
+ addr--; \
+ addr--; \
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+
-+ J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
-+ J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
++ J_ASSERT(sbi->s_buddy_blocks[group]->bb_bitmap);
++ J_ASSERT(sbi->s_buddy_blocks[group]->bb_buddy);
+
+ /* load bitmap */
-+ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
++ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_bitmap);
+ if (e3b->bd_bh == NULL) {
+ ext3_error(sb, "ext3_mb_load_desc",
+ "can't get block for buddy bitmap\n");
+ J_ASSERT(buffer_uptodate(e3b->bd_bh));
+
+ /* load buddy */
-+ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
++ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_buddy);
+ if (e3b->bd_bh2 == NULL) {
+ ext3_error(sb, "ext3_mb_load_desc",
+ "can't get block for buddy bitmap\n");
+ e3b->bd_bitmap = e3b->bd_bh->b_data;
+ e3b->bd_buddy = e3b->bd_bh2->b_data;
+ e3b->bd_blkbits = sb->s_blocksize_bits;
-+ e3b->bd_bd = sbi->s_buddy_blocks + group;
++ e3b->bd_bd = sbi->s_buddy_blocks[group];
+ e3b->bd_sb = sb;
+
+ return 0;
+static inline void
+ext3_lock_group(struct super_block *sb, int group)
+{
-+ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
+}
+
+static inline void
+ext3_unlock_group(struct super_block *sb, int group)
+{
-+ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
+}
+
+static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
+}
+
+int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
-+ unsigned long goal, int *len, int flags, int *errp)
++ unsigned long goal, int *len, int flags, int *errp)
+{
+ struct buffer_head *bitmap_bh = NULL;
+ struct ext3_allocation_context ac;
+ goto out2;
+ }
+
-+ /* loop over the blocks, nad create buddies for free ones */
++ /* loop over the blocks, and create buddies for free ones */
+ for (i = 0; i < sb->s_blocksize * 8; i++) {
+ if (!mb_test_bit(i, (void *) bh->b_data)) {
+ mb_free_blocks(&e3b, i, 1);
+
+#define MB_CREDITS \
+ (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \
-+ + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
++ 2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
+
+int ext3_mb_init_backend(struct super_block *sb)
+{
+ tid_t target;
+ int err, i;
+
-+ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
++ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks *) *
+ sbi->s_groups_count, GFP_KERNEL);
+ if (sbi->s_buddy_blocks == NULL) {
-+ printk("can't allocate mem for buddy maps\n");
++ printk("EXT3-fs: can't allocate mem for buddy maps\n");
+ return -ENOMEM;
+ }
+ memset(sbi->s_buddy_blocks, 0,
-+ sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
++ sizeof(struct ext3_buddy_group_blocks *) * sbi->s_groups_count);
+ sbi->s_buddy = NULL;
+
+ down(&root->i_sem);
+ strlen(EXT3_BUDDY_FILE));
+ if (IS_ERR(db)) {
+ err = PTR_ERR(db);
-+ printk("can't lookup buddy file: %d\n", err);
++ printk("EXT3-fs: can't lookup buddy file: %d\n", err);
+ goto out;
+ }
+
+ struct buffer_head *bh = NULL;
+ handle_t *handle;
+
++ sbi->s_buddy_blocks[i] =
++ kmalloc(sizeof(struct ext3_buddy_group_blocks),
++ GFP_KERNEL);
++ if (sbi->s_buddy_blocks[i] == NULL) {
++ printk("EXT3-fs: can't allocate mem for buddy\n");
++ err = -ENOMEM;
++ goto out2;
++ }
++
+ handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ printk("can't get block for buddy bitmap: %d\n", err);
+ goto out2;
+ }
-+ sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
++ sbi->s_buddy_blocks[i]->bb_bitmap = bh->b_blocknr;
+ brelse(bh);
+
+ /* allocate block for buddy */
+ printk("can't get block for buddy: %d\n", err);
+ goto out2;
+ }
-+ sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
++ sbi->s_buddy_blocks[i]->bb_buddy = bh->b_blocknr;
+ brelse(bh);
+ ext3_journal_stop(handle);
-+ spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
-+ sbi->s_buddy_blocks[i].bb_md_cur = NULL;
-+ sbi->s_buddy_blocks[i].bb_tid = 0;
++ spin_lock_init(&sbi->s_buddy_blocks[i]->bb_lock);
++ sbi->s_buddy_blocks[i]->bb_md_cur = NULL;
++ sbi->s_buddy_blocks[i]->bb_tid = 0;
+ }
+
+ if (journal_start_commit(sbi->s_journal, &target))
+int ext3_mb_release(struct super_block *sb)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int i;
+
+ if (!test_opt(sb, MBALLOC))
+ return 0;
+ spin_unlock(&sbi->s_md_lock);
+ ext3_mb_free_committed_blocks(sb);
+
-+ if (sbi->s_buddy_blocks)
++ if (sbi->s_buddy_blocks) {
++ for (i = 0; i < sbi->s_groups_count; i++)
++ if (sbi->s_buddy_blocks[i])
++ kfree(sbi->s_buddy_blocks[i]);
+ kfree(sbi->s_buddy_blocks);
++ }
+ if (sbi->s_buddy)
+ iput(sbi->s_buddy);
+ if (sbi->s_blocks_reserved)
+
+ /* init file for buddy data */
+ clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
-+ ext3_mb_init_backend(sb);
++ if (ext3_mb_init_backend(sb))
++ return 0;
+
+ es = EXT3_SB(sb)->s_es;
+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
+ ext3_mb_free_blocks(handle, inode, block, count, metadata);
+ return;
+}
-+
Index: linux-2.6.5-sles9/fs/ext3/super.c
===================================================================
--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300
extern unsigned long ext3_count_free_blocks (struct super_block *);
extern void ext3_check_blocks_bitmap (struct super_block *);
extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+@@ -743,6 +746,13 @@
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+ unsigned long);
+
++/* mballoc.c */
++extern int ext3_mb_init(struct super_block *sb);
++extern int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++ unsigned long goal,int *len, int flags,int *errp);
++extern int ext3_mb_release(struct super_block *sb);
++extern void ext3_mb_release_blocks(struct super_block *, int);
++
+ /* namei.c */
+ extern int ext3_orphan_add(handle_t *, struct inode *);
+ extern int ext3_orphan_del(handle_t *, struct inode *);
Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h
===================================================================
--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300
#endif
+
+ /* for buddy allocator */
-+ struct ext3_buddy_group_blocks *s_buddy_blocks;
++ struct ext3_buddy_group_blocks **s_buddy_blocks;
+ struct inode *s_buddy;
+ long s_blocks_reserved;
+ spinlock_t s_reserve_lock;
--- /dev/null
+Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/Kconfig
+===================================================================
+--- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik.orig/drivers/scsi/Kconfig 2004-11-11 07:28:52.000000000 -0800
++++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/Kconfig 2005-01-06 22:58:42.338770968 -0800
+@@ -55,6 +55,13 @@
+ In this case, do not compile the driver for your SCSI host adapter
+ (below) as a module either.
+
++config SD_IOSTATS
++ bool "Enable SCSI disk I/O stats"
++ depends on BLK_DEV_SD
++ ---help---
++ This enables SCSI disk I/O stats collection. You must also enable
++ /proc file system support if you want this feature.
++
+ config CHR_DEV_ST
+ tristate "SCSI tape support"
+ depends on SCSI
+Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/sd.c
+===================================================================
+--- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik.orig/drivers/scsi/sd.c 2004-11-11 07:28:28.000000000 -0800
++++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/sd.c 2005-01-07 01:29:47.033727872 -0800
+@@ -59,12 +59,44 @@
+
+ #include "scsi_logging.h"
+
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++# include <linux/proc_fs.h>
++# include <linux/seq_file.h>
++
++typedef struct {
++ unsigned long long iostat_size;
++ unsigned long long iostat_count;
++} iostat_counter_t;
++
++#define IOSTAT_NCOUNTERS 16
++typedef struct {
++ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS];
++ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS];
++ struct timeval iostat_timeval;
++} iostat_stats_t;
++
++iostat_stats_t **sd_iostats;
++spinlock_t sd_iostats_lock;
++struct proc_dir_entry *sd_iostats_procdir;
++char sd_iostats_procdir_name[] = "sd_iostats";
++
++extern void sd_iostats_init(void);
++extern void sd_iostats_init_disk(struct gendisk *);
++extern void sd_iostats_fini(void);
++extern void sd_iostats_bump(int disk, unsigned int nsect, int iswrite);
++#else
++static inline void sd_iostats_init(void) {}
++static inline void sd_iostats_init_disk(struct gendisk *) {}
++static inline void sd_iostats_fini(void) {}
++static inline void sd_iostats_bump(kdev_t dev, unsigned int nsect, int iswrite) {}
++#endif
+
+ /*
+ * Remaining dev_t-handling stuff
+ */
+ #define SD_MAJORS 16
+ #define SD_DISKS 32768 /* anything between 256 and 262144 */
++#define SD_STATS 256
+
+ /*
+ * Time out in seconds for disks and Magneto-opticals (which are slower).
+@@ -264,6 +296,9 @@
+ SCSI_LOG_HLQUEUE(2, printk("%s : block=%llu\n",
+ disk->disk_name, (unsigned long long)block));
+
++ sd_iostats_bump(scsi_disk(disk)->index, this_count,
++ rq_data_dir(SCpnt->request) == WRITE);
++
+ /*
+ * If we have a 1K hardware sectorsize, prevent access to single
+ * 512 byte sectors. In theory we could handle this - in fact
+@@ -460,6 +495,7 @@
+ scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT);
+ }
+
++ sd_iostats_init_disk(disk);
+ return 0;
+
+ error_out:
+@@ -1548,6 +1584,327 @@
+ sd_sync_cache(sdp);
+ }
+
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++static int
++sd_iostats_seq_show(struct seq_file *seq, void *v)
++{
++ struct timeval now;
++ struct gendisk *disk = seq->private;
++ iostat_stats_t *stats;
++ unsigned long long read_len;
++ unsigned long long read_len_tot;
++ unsigned long read_num;
++ unsigned long read_num_tot;
++ unsigned long long write_len;
++ unsigned long long write_len_tot;
++ unsigned long write_num;
++ unsigned long write_num_tot;
++ int i;
++ int maxi;
++
++ if (sd_iostats == NULL) {
++ printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n");
++ BUG();
++ }
++
++ stats = sd_iostats[scsi_disk(disk)->index];
++ if (stats == NULL) {
++ printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n");
++ BUG();
++ }
++
++ do_gettimeofday(&now);
++ now.tv_sec -= stats->iostat_timeval.tv_sec;
++ now.tv_usec -= stats->iostat_timeval.tv_usec;
++ if (now.tv_usec < 0) {
++ now.tv_usec += 1000000;
++ now.tv_sec--;
++ }
++
++ /* this sampling races with updates */
++ seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n",
++ scsi_disk(disk)->index, now.tv_sec, now.tv_usec);
++
++ for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
++ if (stats->iostat_read_histogram[i].iostat_count != 0 ||
++ stats->iostat_write_histogram[i].iostat_count != 0)
++ break;
++ maxi = i;
++
++ seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size",
++ "reads", "total", "writes", "total");
++
++ read_len_tot = write_len_tot = 0;
++ read_num_tot = write_num_tot = 0;
++ for (i = 0; i <= maxi; i++) {
++ read_len = stats->iostat_read_histogram[i].iostat_size;
++ read_len_tot += read_len;
++ read_num = stats->iostat_read_histogram[i].iostat_count;
++ read_num_tot += read_num;
++
++ write_len = stats->iostat_write_histogram[i].iostat_size;
++ write_len_tot += write_len;
++ write_num = stats->iostat_write_histogram[i].iostat_count;
++ write_num_tot += write_num;
++
++ seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n",
++ 512<<i, read_num, read_len, write_num, write_len);
++ }
++
++ seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n", "total",
++ read_num_tot, read_len_tot,
++ write_num_tot, write_len_tot);
++ return 0;
++}
++
++static void *
++sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
++{
++ return (*pos == 0) ? (void *)1 : NULL;
++}
++
++static void *
++sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
++{
++ ++*pos;
++ return NULL;
++}
++
++static void
++sd_iostats_seq_stop(struct seq_file *p, void *v)
++{
++}
++
++static struct seq_operations sd_iostats_seqops = {
++ .start = sd_iostats_seq_start,
++ .stop = sd_iostats_seq_stop,
++ .next = sd_iostats_seq_next,
++ .show = sd_iostats_seq_show,
++};
++
++static int
++sd_iostats_seq_open (struct inode *inode, struct file *file)
++{
++ int rc;
++
++ rc = seq_open(file, &sd_iostats_seqops);
++ if (rc != 0)
++ return rc;
++
++ ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
++ return 0;
++}
++
++static ssize_t
++sd_iostats_seq_write(struct file *file, const char *buffer,
++ size_t len, loff_t *off)
++{
++ struct seq_file *seq = file->private_data;
++ struct gendisk *disk = seq->private;
++ iostat_stats_t *stats = sd_iostats[scsi_disk(disk)->index];
++ unsigned long flags;
++
++
++ spin_lock_irqsave (&sd_iostats_lock, flags);
++ memset (stats, 0, sizeof(*stats));
++ do_gettimeofday(&stats->iostat_timeval);
++ spin_unlock_irqrestore (&sd_iostats_lock, flags);
++
++ return len;
++}
++
++static struct file_operations sd_iostats_proc_fops = {
++ .owner = THIS_MODULE,
++ .open = sd_iostats_seq_open,
++ .read = seq_read,
++ .write = sd_iostats_seq_write,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
++extern struct proc_dir_entry *proc_scsi;
++
++void
++sd_iostats_init(void)
++{
++ int i;
++
++ spin_lock_init(&sd_iostats_lock);
++
++ sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL);
++ if (sd_iostats == NULL) {
++ printk(KERN_WARNING "Can't keep sd iostats: "
++ "ENOMEM allocating stats array size %ld\n",
++ SD_STATS * sizeof(iostat_stats_t *));
++ return;
++ }
++
++ for (i = 0; i < SD_STATS; i++)
++ sd_iostats[i] = NULL;
++
++ if (proc_scsi == NULL) {
++ printk(KERN_WARNING "No access to sd iostats: "
++ "proc_scsi is NULL\n");
++ return;
++ }
++
++ sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
++ S_IFDIR | S_IRUGO | S_IXUGO,
++ proc_scsi);
++ if (sd_iostats_procdir == NULL) {
++ printk(KERN_WARNING "No access to sd iostats: "
++ "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
++ return;
++ }
++}
++
++void
++sd_iostats_init_disk(struct gendisk *disk)
++{
++ struct proc_dir_entry *pde;
++ unsigned long flags;
++ iostat_stats_t *stats;
++
++ if (sd_iostats == NULL ||
++ sd_iostats_procdir == NULL)
++ return;
++
++ if (scsi_disk(disk)->index > SD_STATS) {
++ printk(KERN_ERR "sd_iostats_init_disk: "
++ "unexpected disk index %d(%d)\n",
++ scsi_disk(disk)->index, SD_STATS);
++ return;
++ }
++
++ if (sd_iostats[scsi_disk(disk)->index] != NULL)
++ return;
++
++ stats = kmalloc(sizeof(*stats), GFP_KERNEL);
++ if (stats == NULL) {
++ printk(KERN_WARNING "Can't keep %s iostats: "
++ "ENOMEM allocating stats size %ld\n",
++ disk->disk_name, sizeof(*stats));
++ return;
++ }
++
++ memset (stats, 0, sizeof(*stats));
++ do_gettimeofday(&stats->iostat_timeval);
++
++ spin_lock_irqsave(&sd_iostats_lock, flags);
++
++ if (sd_iostats[scsi_disk(disk)->index] != NULL) {
++ spin_unlock_irqrestore(&sd_iostats_lock, flags);
++ kfree (stats);
++ return;
++ }
++
++ sd_iostats[scsi_disk(disk)->index] = stats;
++
++ spin_unlock_irqrestore(&sd_iostats_lock, flags);
++
++ pde = create_proc_entry(disk->disk_name, S_IRUGO | S_IWUSR,
++ sd_iostats_procdir);
++ if (pde == NULL) {
++ printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n",
++ sd_iostats_procdir_name, disk->disk_name);
++ } else {
++ pde->proc_fops = &sd_iostats_proc_fops;
++ pde->data = disk;
++ }
++}
++
++static void sd_devname(unsigned int disknum, char *buffer)
++{
++ if (disknum < 26)
++ sprintf(buffer, "sd%c", 'a' + disknum);
++ else {
++ unsigned int min1;
++ unsigned int min2;
++ /*
++ * For larger numbers of disks, we need to go to a new
++ * naming scheme.
++ */
++ min1 = disknum / 26;
++ min2 = disknum % 26;
++ sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2);
++ }
++}
++
++void
++sd_iostats_fini(void)
++{
++ char name[6];
++ int i;
++
++ if (sd_iostats_procdir != NULL) {
++ for (i = 0; i < SD_STATS; i++) {
++ sd_devname(i, name);
++ remove_proc_entry(name, sd_iostats_procdir);
++ }
++
++ if (proc_scsi == NULL) {
++ printk(KERN_ERR "sd_iostats_fini: proc_scsi NULL\n");
++ BUG();
++ }
++ remove_proc_entry(sd_iostats_procdir_name,
++ proc_scsi);
++
++ sd_iostats_procdir = NULL;
++ }
++
++ if (sd_iostats != NULL) {
++ for (i = 0; i < SD_STATS; i++) {
++ if (sd_iostats[i] != NULL)
++ kfree (sd_iostats[i]);
++ }
++
++ kfree(sd_iostats);
++ sd_iostats = NULL;
++ }
++}
++
++void
++sd_iostats_bump(int disk, unsigned int nsect, int iswrite)
++{
++ iostat_stats_t *stats;
++ iostat_counter_t *counter;
++ int bucket;
++ int tmp;
++ unsigned long irqflags;
++
++ if (sd_iostats == NULL)
++ return;
++
++ if (disk < 0 || disk >= SD_STATS) {
++ printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n",
++ disk, SD_STATS);
++ BUG();
++ }
++
++ for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
++ tmp /= 2;
++
++ if (bucket >= IOSTAT_NCOUNTERS) {
++ printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
++ BUG();
++ }
++
++ spin_lock_irqsave(&sd_iostats_lock, irqflags);
++
++ stats = sd_iostats[disk];
++ if (stats != NULL) {
++ counter = iswrite ?
++ &stats->iostat_write_histogram[bucket] :
++ &stats->iostat_read_histogram[bucket];
++
++ counter->iostat_size += nsect;
++ counter->iostat_count++;
++ }
++
++ spin_unlock_irqrestore(&sd_iostats_lock, irqflags);
++}
++#endif
++
+ /**
+ * init_sd - entry point for this driver (both when built in or when
+ * a module).
+@@ -1557,6 +1914,7 @@
+ static int __init init_sd(void)
+ {
+ int majors = 0, i;
++ int rc = 0;
+
+ SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));
+
+@@ -1567,7 +1925,10 @@
+ if (!majors)
+ return -ENODEV;
+
+- return scsi_register_driver(&sd_template.gendrv);
++ rc = scsi_register_driver(&sd_template.gendrv);
++ if (rc == 0)
++ sd_iostats_init();
++ return rc;
+ }
+
+ /**
+@@ -1581,6 +1942,7 @@
+
+ SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n"));
+
++ sd_iostats_fini();
+ scsi_unregister_driver(&sd_template.gendrv);
+ for (i = 0; i < SD_MAJORS; i++)
+ unregister_blkdev(sd_major(i), "sd");
+Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/scsi_proc.c
+===================================================================
+--- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik.orig/drivers/scsi/scsi_proc.c 2004-04-03 19:36:17.000000000 -0800
++++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/scsi_proc.c 2005-01-07 00:15:53.905665776 -0800
+@@ -37,7 +37,8 @@
+ /* 4K page size, but our output routines, use some slack for overruns */
+ #define PROC_BLOCK_SIZE (3*1024)
+
+-static struct proc_dir_entry *proc_scsi;
++struct proc_dir_entry *proc_scsi;
++EXPORT_SYMBOL(proc_scsi);
+
+ /* Protect sht->present and sht->proc_dir */
+ static DECLARE_MUTEX(global_host_template_sem);
- if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
+ if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2) {
/* Either write before start, or a double free. */
-+ printk("inconsistency in %s\n", cachep->name);
++ printk("inconsistency at start of %s\n", cachep->name);
BUG();
+ }
if (xchg((unsigned long *)(objp+cachep->objsize -
- BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
+ BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2) {
/* Either write past end, or a double free. */
-+ printk("inconsistency in %s\n", cachep->name);
++ printk("inconsistency at end of %s\n", cachep->name);
BUG();
+ }
}
+sd_iostats-2.6-suse.patch
+blkdev_tunables-2.6-suse.patch
bluesmoke-2.6-suse-lnxi.patch
mtd-2.6-suse-lnxi.patch
perfctr-2.6-suse-lnxi.patch
kexec-2.6-suse-lnxi.patch
+brk-locked-2.6-suse-lnxi.patch
LINUX26=1
SUSEBUILD=1
-BASE_ARCHS="i686 ppc"
+BASE_ARCHS="i686 ppc x86_64"
BIGMEM_ARCHS=""
BOOT_ARCHS=""
JENSEN_ARCHS=""
SERIES MNEMONIC COMMENT ARCH
SUPPORTED KERNELS:
-rhel-2.4.21 linux-2.4.21-20.3EL same as chaos-2.4.21 all
+rhel-2.4.21 linux-2.4.21-20.3EL all
2.6-suse linux-2.6 SLES9 SP1 kernel all
UNSUPPORTED KERNELS; BEING PHASED OUT; MAY BE MISSING CRITICAL BUG FIXES:
hp-pnnl-2.4.20 linux-2.4.20-hp4_pnnl1 same as vanilla but no uml ia64
vanilla-2.4.24 linux-2.4.24 patch with uml-2.4.24-6 um
-chaos-2.4.21 linux-chaos-2.4.21 same as rh-2.4.21-20.EL i386
suse-2.4.21-jvn linux-2.4.21-241 sles8 2.4 kernel i386
spin_lock_init(&cli->cl_write_rpc_hist.oh_lock);
spin_lock_init(&cli->cl_read_page_hist.oh_lock);
spin_lock_init(&cli->cl_write_page_hist.oh_lock);
+ spin_lock_init(&cli->cl_read_offset_hist.oh_lock);
+ spin_lock_init(&cli->cl_write_offset_hist.oh_lock);
if (num_physpages >> (20 - PAGE_SHIFT) <= 128) { /* <= 128 MB */
cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES / 4;
cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT / 4;
if (!obd->obd_recovering) {
spin_unlock_bh(&obd->obd_processing_task_lock);
return;
- }
- CDEBUG(D_HA, "timer will expire in %u seconds\n",
- OBD_RECOVERY_TIMEOUT / HZ);
+ }
+ CDEBUG(D_HA, "%s: timer will expire in %u seconds\n", obd->obd_name,
+ (int)(OBD_RECOVERY_TIMEOUT / HZ));
mod_timer(&obd->obd_recovery_timer, jiffies + OBD_RECOVERY_TIMEOUT);
spin_unlock_bh(&obd->obd_processing_task_lock);
}
return;
}
CWARN("%s: starting recovery timer (%us)\n", obd->obd_name,
- OBD_RECOVERY_TIMEOUT / HZ);
+ (int)(OBD_RECOVERY_TIMEOUT / HZ));
obd->obd_recovery_handler = handler;
obd->obd_recovery_timer.function = target_recovery_expired;
obd->obd_recovery_timer.data = (unsigned long)obd;
{
struct obd_device *obd = req->rq_export->exp_obd;
- if (!obd->obd_no_transno)
+ if (!obd->obd_no_transno && req->rq_repmsg != NULL)
req->rq_repmsg->last_committed = obd->obd_last_committed;
else
DEBUG_REQ(D_IOCTL, req,
}
*flags |= LDLM_FL_LOCK_CHANGED;
RETURN(0);
- } else if (rc == ELDLM_LOCK_ABORTED ||
- (rc == 0 && (*flags & LDLM_FL_INTENT_ONLY))) {
+ } else if (rc != ELDLM_OK ||
+ (rc == ELDLM_OK && (*flags & LDLM_FL_INTENT_ONLY))) {
ldlm_lock_destroy(lock);
RETURN(rc);
}
- LASSERT(rc == ELDLM_OK);
}
l_lock(&ns->ns_lock);
}
list_del_init(&lock->l_pending_chain);
if ((void *)lock->l_export < LP_POISON + PAGE_SIZE &&
- (void *)lock->l_export >= LP_POISON + PAGE_SIZE) {
+ (void *)lock->l_export >= LP_POISON) {
CERROR("lock with free export on elt list %p\n",
- export);
+ lock->l_export);
lock->l_export = NULL;
LDLM_ERROR(lock, "free export");
continue;
* which reference these resources. -phil */
if (!ldlm_resource_putref(res) &&
!(flags & LDLM_FL_LOCAL_ONLY)) {
- CERROR("Resource refcount nonzero (%d) after "
- "lock cleanup; forcing cleanup.\n",
+ CERROR("Namespace %s resource refcount %d "
+ "after lock cleanup; forcing cleanup.\n",
+ ns->ns_name,
atomic_read(&res->lr_refcount));
ldlm_resource_dump(D_ERROR, res);
atomic_set(&res->lr_refcount, 1);
LIBS = $(LIBEFENCE)
LUSTRE_LIBS = libllite.a \
- $(top_builddir)/lov/liblov.a \
- $(top_builddir)/obdecho/libobdecho.a \
- $(top_builddir)/osc/libosc.a \
- $(top_builddir)/mdc/libmdc.a \
- $(top_builddir)/ptlrpc/libptlrpc.a \
- $(top_builddir)/obdclass/liblustreclass.a \
- $(top_builddir)/lvfs/liblvfs.a
+ $(top_builddir)/lustre/lov/liblov.a \
+ $(top_builddir)/lustre/obdecho/libobdecho.a \
+ $(top_builddir)/lustre/osc/libosc.a \
+ $(top_builddir)/lustre/mdc/libmdc.a \
+ $(top_builddir)/lustre/ptlrpc/libptlrpc.a \
+ $(top_builddir)/lustre/obdclass/liblustreclass.a \
+ $(top_builddir)/lustre/lvfs/liblvfs.a
PTL_LIBS = $(top_builddir)/portals/utils/libuptlctl.a \
$(top_builddir)/portals/unals/libtcpnal.a \
build_obj_list ../lvfs liblvfs.a
# portals components libs
-build_obj_list ../portals/utils libuptlctl.a
-build_obj_list ../portals/unals libtcpnal.a
-build_obj_list ../portals/portals libportals.a
+build_obj_list ../../portals/utils libuptlctl.a
+build_obj_list ../../portals/unals libtcpnal.a
+build_obj_list ../../portals/portals libportals.a
# create static lib lsupport
rm -f $CWD/liblsupport.a
stripe = llu_lock_to_stripe_offset(inode, lock);
l_lock(&lock->l_resource->lr_namespace->ns_lock);
+
kms = ldlm_extent_shift_kms(lock,
lsm->lsm_oinfo[stripe].loi_kms);
- l_unlock(&lock->l_resource->lr_namespace->ns_lock);
if (lsm->lsm_oinfo[stripe].loi_kms != kms)
LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
lsm->lsm_oinfo[stripe].loi_kms, kms);
lsm->lsm_oinfo[stripe].loi_kms = kms;
+
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
iput:
I_RELE(inode);
break;
struct inode *inode;
struct ll_inode_info *lli;
struct lov_stripe_md *lsm;
- __u32 stripe;
+ int stripe;
__u64 kms;
/* This lock wasn't granted, don't try to evict pages */
* lock hold times should be very short as ast processing
* requires them and has a short timeout. so, i_sem before ns
* lock.*/
- down(&inode->i_sem);
l_lock(&lock->l_resource->lr_namespace->ns_lock);
+ spin_lock(&lli->lli_lock);
+
kms = ldlm_extent_shift_kms(lock,
lsm->lsm_oinfo[stripe].loi_kms);
-
if (lsm->lsm_oinfo[stripe].loi_kms != kms)
LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
lsm->lsm_oinfo[stripe].loi_kms, kms);
lsm->lsm_oinfo[stripe].loi_kms = kms;
+
+ spin_unlock(&lli->lli_lock);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
- up(&inode->i_sem);
//ll_try_done_writing(inode);
iput:
iput(inode);
struct ll_inode_info *lli = ll_i2info(inode);
struct lustre_handle lockh = { 0 };
struct ost_lvb *lvb;
- __u32 stripe;
+ int stripe;
ENTRY;
if (flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
lsm->lsm_oinfo[stripe].loi_rss = lvb->lvb_size;
l_lock(&lock->l_resource->lr_namespace->ns_lock);
- down(&inode->i_sem);
+ spin_lock(&lli->lli_lock);
+
kms = MAX(lsm->lsm_oinfo[stripe].loi_kms, lvb->lvb_size);
kms = ldlm_extent_shift_kms(NULL, kms);
if (lsm->lsm_oinfo[stripe].loi_kms != kms)
LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
lsm->lsm_oinfo[stripe].loi_kms, kms);
lsm->lsm_oinfo[stripe].loi_kms = kms;
- up(&inode->i_sem);
+
+ spin_unlock(&lli->lli_lock);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
}
#define LL_MAX_BLKSIZE (4UL * 1024 * 1024)
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#define ll_s2sbi(sb) ((struct ll_sb_info *)((sb)->s_fs_info))
+#define ll_s2sbi(sb) ((struct ll_sb_info *)((sb)->s_fs_info))
+#define ll_s2sbi_nocast(sb) ((sb)->s_fs_info)
void __d_rehash(struct dentry * entry, int lock);
static inline __u64 ll_ts2u64(struct timespec *time)
{
}
#else /* 2.4 here */
#define ll_s2sbi(sb) ((struct ll_sb_info *)((sb)->u.generic_sbp))
+#define ll_s2sbi_nocast(sb) ((sb)->u.generic_sbp)
static inline __u64 ll_ts2u64(time_t *time)
{
return *time;
INIT_LIST_HEAD(&sbi->ll_conn_chain);
INIT_HLIST_HEAD(&sbi->ll_orphan_dentry_list);
- ll_s2sbi(sb) = sbi;
+ ll_s2sbi_nocast(sb) = sbi;
generate_random_uuid(uuid);
class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
spin_unlock(&ll_sb_lock);
OBD_FREE(sbi, sizeof(*sbi));
}
- ll_s2sbi(sb) = NULL;
+ ll_s2sbi_nocast(sb) = NULL;
EXIT;
}
*lenp = 3;
return 1;
}
- if (dentry->d_parent) {
+ if (dentry->d_parent) {
*datap++ = dentry->d_parent->d_inode->i_ino;
*datap++ = (__u32)(S_IFMT & dentry->d_parent->d_inode->i_mode);
-
+
*lenp = 5;
return 2;
}
out:
size = (((obd_off)page->index) << PAGE_SHIFT) + to;
if (rc == 0) {
+ spin_lock(&lli->lli_lock);
obd_increase_kms(exp, lsm, size);
+ spin_unlock(&lli->lli_lock);
if (size > inode->i_size)
inode->i_size = size;
SetPageUptodate(page);
if (ras->ras_window_len) {
start = ras->ras_next_readahead;
end = ras->ras_window_start + ras->ras_window_len - 1;
- end = min(end, (unsigned long)(kms >> PAGE_CACHE_SHIFT));
+ end = min(end, (unsigned long)((kms - 1) >> PAGE_CACHE_SHIFT));
ras->ras_next_readahead = max(end, end + 1);
RAS_CDEBUG(ras);
ptlrpc_set_destroy(set);
if (rc == 0) {
rc = iobuf->length;
- if (rw == WRITE)
+ if (rw == WRITE) {
+ spin_lock(&lli->lli_lock);
obd_increase_kms(ll_i2obdexp(inode), lsm, offset);
+ spin_unlock(&lli->lli_lock);
+ }
}
OBD_FREE(pga, sizeof(*pga) * iobuf->nr_pages);
ll_unregister_cache(&ll_cache_definition);
}
-
return rc;
}
#include <linux/lustre_idl.h>
#include <linux/lustre_dlm.h>
#include <linux/lustre_mds.h>
+#include <linux/lustre_debug.h>
#include <linux/obd_class.h>
#include <linux/obd_lov.h>
#include <linux/obd_ost.h>
RETURN(0);
}
}
+ LDLM_ERROR(data->lock, "lock on inode without such object\n");
+ dump_lsm(D_ERROR, data->lsm);
RETURN(-ENXIO);
} else if (keylen >= strlen("size_to_stripe") &&
strcmp(key, "size_to_stripe") == 0) {
#include "lov_internal.h"
-void lov_dump_lmm_v0(int level, struct lov_mds_md_v0 *lmm)
-{
- int i, num_ost, stripe, idx;
-
- num_ost = le32_to_cpu(lmm->lmm_ost_count);
- idx = le32_to_cpu(lmm->lmm_stripe_offset);
- CDEBUG(level, "objid "LPX64", magic 0x%08X, ost_count %u\n",
- le64_to_cpu(lmm->lmm_object_id), le32_to_cpu(lmm->lmm_magic),
- num_ost);
- CDEBUG(level,"stripe_size %u, stripe_count %u, stripe_offset %u\n",
- le32_to_cpu(lmm->lmm_stripe_size),
- le32_to_cpu(lmm->lmm_stripe_count), idx);
- for (i = stripe = 0; i < le32_to_cpu(lmm->lmm_ost_count); i++, idx++) {
- idx %= num_ost;
- if (lmm->lmm_objects[idx].l_object_id == 0)
- continue;
- CDEBUG(level, "stripe %u idx %u subobj "LPX64"\n", stripe, idx,
- le64_to_cpu(lmm->lmm_objects[idx].l_object_id));
- stripe++;
- }
-}
-
void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm)
{
struct lov_ost_data_v1 *lod;
return stripe_count;
}
-static int lov_verify_lmm_v0(struct lov_mds_md_v0 *lmm, int lmm_bytes,
- int *stripe_count)
-{
- if (lmm_bytes < sizeof(*lmm)) {
- CERROR("lov_mds_md too small: %d, need at least %d\n",
- lmm_bytes, (int)sizeof(*lmm));
- return -EINVAL;
- }
-
- *stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
-
- if (*stripe_count == 0 ||
- *stripe_count > le32_to_cpu(lmm->lmm_ost_count)) {
- CERROR("bad stripe count %d\n", *stripe_count);
- lov_dump_lmm_v0(D_WARNING, lmm);
- return -EINVAL;
- }
-
- if (lmm_bytes < lov_mds_md_v0_size(*stripe_count)) {
- CERROR("LOV EA too small: %d, need %d\n",
- lmm_bytes, lov_mds_md_v0_size(*stripe_count));
- lov_dump_lmm_v0(D_WARNING, lmm);
- return -EINVAL;
- }
-
- if (lmm->lmm_object_id == 0) {
- CERROR("zero object id\n");
- lov_dump_lmm_v0(D_WARNING, lmm);
- return -EINVAL;
- }
-
- if (le32_to_cpu(lmm->lmm_stripe_offset) >
- le32_to_cpu(lmm->lmm_ost_count)) {
- CERROR("stripe offset %d more than number of OSTs %d\n",
- le32_to_cpu(lmm->lmm_stripe_offset),
- le32_to_cpu(lmm->lmm_ost_count));
- lov_dump_lmm_v0(D_WARNING, lmm);
- return -EINVAL;
- }
-
- if (lmm->lmm_stripe_size == 0) {
- CERROR("zero stripe size\n");
- lov_dump_lmm_v0(D_WARNING, lmm);
- return -EINVAL;
- }
-
- return 0;
-}
-
static int lov_verify_lmm_v1(struct lov_mds_md_v1 *lmm, int lmm_bytes,
int *stripe_count)
{
switch (le32_to_cpu(*(__u32 *)lmm)) {
case LOV_MAGIC_V1:
return lov_verify_lmm_v1(lmm, lmm_bytes, stripe_count);
- case LOV_MAGIC_V0:
- return lov_verify_lmm_v0(lmm, lmm_bytes, stripe_count);
default:
CERROR("bad disk LOV MAGIC: 0x%08X\n",
le32_to_cpu(*(__u32 *)lmm));
*lsmp = NULL;
}
-int lov_unpackmd_v0(struct lov_obd *lov, struct lov_stripe_md *lsm,
- struct lov_mds_md_v0 *lmm)
-{
- struct lov_oinfo *loi;
- int i, ost_offset, ost_count;
-
- lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id);
- /* lsm->lsm_object_gr = 0; implicit */
- lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
- lsm->lsm_xfersize = lsm->lsm_stripe_size * lsm->lsm_stripe_count;
- lsm->lsm_pattern = LOV_PATTERN_RAID0;
- ost_offset = le32_to_cpu(lmm->lmm_stripe_offset);
- ost_count = le16_to_cpu(lmm->lmm_ost_count);
-
- for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) {
- ost_offset %= ost_count;
-
- if (!lmm->lmm_objects[ost_offset].l_object_id)
- continue;
-
- loi->loi_id =
- le64_to_cpu(lmm->lmm_objects[ost_offset].l_object_id);
- /* loi->loi_gr = 0; implicit */
- loi->loi_ost_idx = ost_offset;
- /* loi->loi_ost_gen = 0; implicit */
- loi++;
- }
-
- if (loi - lsm->lsm_oinfo != lsm->lsm_stripe_count) {
- CERROR("missing objects in lmm struct\n");
- lov_dump_lmm_v0(D_WARNING, lmm);
- return -EINVAL;
- }
-
- return 0;
-}
-
int lov_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm,
struct lov_mds_md_v1 *lmm)
{
case LOV_MAGIC_V1:
rc = lov_unpackmd_v1(lov, *lsmp, lmm);
break;
- case LOV_MAGIC_V0:
- rc = lov_unpackmd_v0(lov, *lsmp, (void *)lmm);
- break;
}
if (rc) {
void qos_shrink_lsm(struct lov_request_set *set)
{
- struct lov_stripe_md *lsm = set->set_md;
- struct lov_stripe_md *lsm_new;
+ struct lov_stripe_md *lsm = set->set_md, *lsm_new;
/* XXX LOV STACKING call into osc for sizes */
unsigned oldsize, newsize;
ENTRY;
LASSERT(src_oa->o_valid & OBD_MD_FLID);
-
+
lsm->lsm_object_id = src_oa->o_id;
if (!lsm->lsm_stripe_size)
lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
struct lov_request *req;
-
+
++ost_start_idx;
if (lov->tgts[ost_idx].active == 0) {
CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
OBD_ALLOC(req, sizeof(*req));
if (req == NULL)
GOTO(out, rc = -ENOMEM);
-
+
req->rq_buflen = sizeof(*req->rq_md);
OBD_ALLOC(req->rq_md, req->rq_buflen);
if (req->rq_md == NULL)
GOTO(out, rc = -ENOMEM);
-
+
req->rq_oa = obdo_alloc();
if (req->rq_oa == NULL)
GOTO(out, rc = -ENOMEM);
-
+
req->rq_idx = ost_idx;
req->rq_stripe = i;
/* create data objects with "parent" OA */
if (set->set_count == lsm->lsm_stripe_count)
GOTO(out, rc = 0);
}
-
+
if (set->set_count == 0)
GOTO(out, rc = -EIO);
-
+
/* If we were passed specific striping params, then a failure to
* meet those requirements is an error, since we can't reallocate
* that memory (it might be part of a larger array or something).
*/
if (!newea) {
CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
- lsm->lsm_object_id, set->set_count,
+ lsm->lsm_object_id, set->set_count,
lsm->lsm_stripe_count, rc);
rc = rc ? rc : -EFBIG;
} else {
out:
RETURN(rc);
}
-
-
-
static int enqueue_done(struct lov_request_set *set, __u32 mode)
{
- struct list_head *pos;
struct lov_request *req;
struct lustre_handle *lov_lockhp = NULL;
struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
RETURN(0);
/* cancel enqueued/matched locks */
- list_for_each (pos, &set->set_list) {
- req = list_entry(pos, struct lov_request, rq_link);
-
+ list_for_each_entry(req, &set->set_list, rq_link) {
if (!req->rq_complete || req->rq_rc)
continue;
int rc = 0;
ENTRY;
- LASSERT(set->set_exp);
if (set == NULL)
RETURN(0);
+ LASSERT(set->set_exp);
if (set->set_completes)
rc = enqueue_done(set, mode);
else
int rc = 0;
ENTRY;
- LASSERT(set->set_exp);
if (set == NULL)
RETURN(0);
+ LASSERT(set->set_exp);
if (set->set_completes) {
if (set->set_count == set->set_success &&
flags & LDLM_FL_TEST_LOCK)
GOTO(out_set, rc = -ENOMEM);
lockh->cookie = set->set_lockh->llh_handle.h_cookie;
- loi = lsm->lsm_oinfo;
- for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++){
struct lov_request *req;
obd_off start, end;
}
lockh->cookie = set->set_lockh->llh_handle.h_cookie;
- loi = lsm->lsm_oinfo;
- for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++){
struct lov_request *req;
struct lustre_handle *lov_lockhp;
}
static int create_done(struct obd_export *exp, struct lov_request_set *set,
- struct lov_stripe_md **ea)
+ struct lov_stripe_md **lsmp)
{
struct lov_obd *lov = &exp->exp_obd->u.lov;
struct obd_trans_info *oti = set->set_oti;
struct obdo *src_oa = set->set_oa;
- struct list_head *pos;
struct lov_request *req;
struct obdo *ret_oa = NULL;
int attrset = 0, rc = 0;
if (!set->set_success)
GOTO(cleanup, rc = -EIO);
- if (*ea == NULL && set->set_count != set->set_success) {
+ if (*lsmp == NULL && set->set_count != set->set_success) {
set->set_count = set->set_success;
qos_shrink_lsm(set);
}
if (ret_oa == NULL)
GOTO(cleanup, rc = -ENOMEM);
- list_for_each (pos, &set->set_list) {
- req = list_entry(pos, struct lov_request, rq_link);
+ list_for_each_entry(req, &set->set_list, rq_link) {
if (!req->rq_complete || req->rq_rc)
continue;
lov_merge_attrs(ret_oa, req->rq_oa, req->rq_oa->o_valid,
memcpy(src_oa, ret_oa, sizeof(*src_oa));
obdo_free(ret_oa);
- *ea = set->set_md;
+ *lsmp = set->set_md;
GOTO(done, rc = 0);
cleanup:
- list_for_each (pos, &set->set_list) {
+ list_for_each_entry(req, &set->set_list, rq_link) {
struct obd_export *sub_exp;
int err = 0;
- req = list_entry(pos, struct lov_request, rq_link);
if (!req->rq_complete || req->rq_rc)
continue;
set->set_oa->o_id, req->rq_oa->o_id,
req->rq_idx, rc);
}
- if (*ea == NULL)
+ if (*lsmp == NULL)
obd_free_memmd(exp, &set->set_md);
done:
if (oti && set->set_cookies) {
RETURN(rc);
}
-int lov_fini_create_set(struct lov_request_set *set, struct lov_stripe_md **ea)
+int lov_fini_create_set(struct lov_request_set *set,struct lov_stripe_md **lsmp)
{
int rc = 0;
ENTRY;
if (set == NULL)
RETURN(0);
if (set->set_completes) {
- rc = create_done(set->set_exp, set, ea);
+ rc = create_done(set->set_exp, set, lsmp);
/* FIXME update qos data here */
}
RETURN(0);
}
-int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **ea,
+int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **lsmp,
struct obdo *src_oa, struct obd_trans_info *oti,
struct lov_request_set **reqset)
{
lov_init_set(set);
set->set_exp = exp;
- set->set_md = *ea;
+ set->set_md = *lsmp;
set->set_oa = src_oa;
set->set_oti = oti;
RETURN(rc);
out_lsm:
- if (*ea == NULL)
+ if (*lsmp == NULL)
obd_free_memmd(exp, &set->set_md);
out_set:
- lov_fini_create_set(set, ea);
+ lov_fini_create_set(set, lsmp);
RETURN(rc);
}
struct list_head *pos;
struct lov_request *req;
ENTRY;
-
+
list_for_each (pos, &set->set_list) {
req = list_entry(pos, struct lov_request, rq_link);
-
+
if (!req->rq_complete || req->rq_rc)
continue;
-
+
loi = &lsm->lsm_oinfo[req->rq_stripe];
-
+
if (req->rq_oa->o_valid & OBD_MD_FLBLOCKS)
loi->loi_blocks = req->rq_oa->o_blocks;
}
-
+
RETURN(0);
}
}
/* alloc and initialize lov request */
- loi = lsm->lsm_oinfo;
shift = 0;
- for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++){
struct lov_request *req;
if (info[i].count == 0)
static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
int logs)
{
- /* For updates to the last recieved file */
+ /* For updates to the last received file */
int nblocks = EXT3_SINGLEDATA_TRANS_BLOCKS;
journal_t *journal;
void *handle;
}
request = *reqp = it->d.lustre.it_data;
LASSERT(request != NULL);
+ LASSERT(request != LP_POISON);
/* If we're doing an IT_OPEN which did not result in an actual
* successful open, then we need to remove the bit which saves
if (body->fid1.id == 0) {
/* a fid of zero is taken to mean "sync whole filesystem" */
rc = fsfilt_sync(obd, mds->mds_sb);
- if (rc)
- GOTO(out, rc);
+ GOTO(out, rc);
} else {
- /* just any file to grab fsync method - "file" arg unused */
- struct file *file = mds->mds_rcvd_filp;
struct dentry *de;
de = mds_fid2dentry(mds, &body->fid1, NULL);
if (IS_ERR(de))
GOTO(out, rc = PTR_ERR(de));
- rc = file->f_op->fsync(NULL, de, 1);
- l_dput(de);
- if (rc)
- GOTO(out, rc);
+ /* The file parameter isn't used for anything */
+ if (de->d_inode->i_fop && de->d_inode->i_fop->fsync)
+ rc = de->d_inode->i_fop->fsync(NULL, de, 1);
+ if (rc == 0) {
+ body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body));
+ mds_pack_inode2fid(&body->fid1, de->d_inode);
+ mds_pack_inode2body(body, de->d_inode);
+ }
- body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body));
- mds_pack_inode2fid(&body->fid1, de->d_inode);
- mds_pack_inode2body(body, de->d_inode);
+ l_dput(de);
+ GOTO(out, rc);
}
out:
req->rq_status = rc;
~MDS_INCOMPAT_SUPP);
GOTO(err_msd, rc = -EINVAL);
}
- /* XXX updating existing b_devel fs only, can be removed in future */
- msd->msd_feature_rocompat = cpu_to_le32(MDS_ROCOMPAT_LOVOBJID);
+
if (msd->msd_feature_rocompat & ~cpu_to_le32(MDS_ROCOMPAT_SUPP)) {
CERROR("unsupported read-only feature %x\n",
le32_to_cpu(msd->msd_feature_rocompat) &
int mds_notify(struct obd_device *obd, struct obd_device *watched, int active);
int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode,
struct lov_mds_md *lmm, int lmm_size);
+void mds_objids_from_lmm(obd_id *ids, struct lov_mds_md *lmm,
+ struct lov_desc *desc);
/* mds/mds_open.c */
int mds_query_write_access(struct inode *inode);
struct mds_export_data *med, int cl_off);
int mds_client_free(struct obd_export *exp, int clear_client);
int mds_obd_create(struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md **ea, struct obd_trans_info *oti);
+ struct lov_stripe_md **ea, struct obd_trans_info *oti);
int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *ea, struct obd_trans_info *oti);
int len, void *karg, void *uarg);
int mds_postrecov(struct obd_device *obd);
#ifdef __KERNEL__
-int mds_get_md(struct obd_device *, struct inode *, void *md, int *size,
+int mds_get_md(struct obd_device *, struct inode *, void *md, int *size,
int lock);
int mds_pack_md(struct obd_device *, struct lustre_msg *, int offset,
struct mds_body *, struct inode *, int lock);
if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC)
RETURN(0);
- CDEBUG(D_INODE, "converting LOV EA on %lu/%u from V0 to V1\n",
- inode->i_ino, inode->i_generation);
+ CDEBUG(D_INODE, "converting LOV EA on %lu/%u from %#08x to %#08x\n",
+ inode->i_ino, inode->i_generation, le32_to_cpu(lmm->lmm_magic),
+ LOV_MAGIC);
rc = obd_unpackmd(obd->u.mds.mds_osc_exp, &lsm, lmm, lmm_size);
if (rc < 0)
GOTO(conv_end, rc);
conv_end:
return rc;
}
+
+void mds_objids_from_lmm(obd_id *ids, struct lov_mds_md *lmm,
+ struct lov_desc *desc)
+{
+ int i;
+ for (i = 0; i < le32_to_cpu(lmm->lmm_stripe_count); i++) {
+ ids[le32_to_cpu(lmm->lmm_objects[i].l_ost_idx)] =
+ le64_to_cpu(lmm->lmm_objects[i].l_object_id);
+ }
+}
+
return ERR_PTR(error);
}
-static void mds_objids_from_lmm(obd_id *ids, struct lov_mds_md *lmm,
- struct lov_desc *desc)
-{
- int i;
- for (i = 0; i < le32_to_cpu(lmm->lmm_stripe_count); i++) {
- ids[le32_to_cpu(lmm->lmm_objects[i].l_ost_idx)] =
- le64_to_cpu(lmm->lmm_objects[i].l_object_id);
- }
-}
-
/* Must be called with i_sem held */
static int mds_create_objects(struct ptlrpc_request *req, int offset,
struct mds_update_record *rec,
if (S_ISLNK(dchild->d_inode->i_mode))
GOTO(cleanup_no_trans, rc = 0);
- if ((rec->ur_flags & MDS_OPEN_DIRECTORY) &&
- !S_ISDIR(dchild->d_inode->i_mode))
- GOTO(cleanup, rc = -ENOTDIR);
-
if (S_ISDIR(dchild->d_inode->i_mode)) {
if (rec->ur_flags & MDS_OPEN_CREAT ||
rec->ur_flags & FMODE_WRITE) {
intent_set_disposition(rep, DISP_OPEN_OPEN);
GOTO(cleanup, rc = -EACCES);
}
+ } else if (rec->ur_flags & MDS_OPEN_DIRECTORY) {
+ GOTO(cleanup, rc = -ENOTDIR);
}
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_CREATE)) {
return -EINVAL;
}
+void dump_lsm(int level, struct lov_stripe_md *lsm)
+{
+ CDEBUG(level, "objid "LPX64", maxbytes "LPX64", magic 0x%08X, "
+ "stripe_size %u, stripe_count %u\n",
+ lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic,
+ lsm->lsm_stripe_size, lsm->lsm_stripe_count);
+}
+
/* XXX assumes only a single page in request */
/*
int dump_req(struct ptlrpc_request *req)
EXPORT_SYMBOL(dump_ioo);
//EXPORT_SYMBOL(dump_req);
EXPORT_SYMBOL(dump_obdo);
+EXPORT_SYMBOL(dump_lsm);
EXPORT_SYMBOL(block_debug_setup);
EXPORT_SYMBOL(block_debug_check);
RETURN(0);
} /* class_unregister_type */
-struct obd_device *class_newdev(struct obd_type *type)
+struct obd_device *class_newdev(struct obd_type *type, char *name)
{
struct obd_device *result = NULL;
int i;
spin_lock(&obd_dev_lock);
- for (i = 0 ; i < MAX_OBD_DEVICES && result == NULL; i++) {
+ for (i = 0 ; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
- if (!obd->obd_type) {
+ if (obd->obd_name && (strcmp(name, obd->obd_name) == 0)) {
+ CERROR("Device %s already exists, won't add\n", name);
+ if (result) {
+ result->obd_type = NULL;
+ result->obd_name = NULL;
+ result = NULL;
+ }
+ break;
+ }
+ if (!result && !obd->obd_type) {
LASSERT(obd->obd_minor == i);
memset(obd, 0, sizeof(*obd));
obd->obd_minor = i;
obd->obd_type = type;
+ obd->obd_name = name;
+ CDEBUG(D_IOCTL, "Adding new device %s\n",
+ obd->obd_name);
result = obd;
}
}
for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
if (obd->obd_name && strcmp(name, obd->obd_name) == 0) {
- spin_unlock(&obd_dev_lock);
- return i;
+ /* Make sure we finished attaching before we give
+ out any references */
+ if (obd->obd_attached) {
+ spin_unlock(&obd_dev_lock);
+ return i;
+ }
+ break;
}
}
spin_unlock(&obd_dev_lock);
{
struct obd_type *type;
struct obd_device *obd;
- char *typename, *name, *uuid;
+ char *typename, *name, *namecopy, *uuid;
int rc, len, cleanup_phase = 0;
if (!lcfg->lcfg_inllen1 || !lcfg->lcfg_inlbuf1) {
type = class_get_type(typename);
if (!type) {
CERROR("OBD: unknown type: %s\n", typename);
- RETURN(-EINVAL);
+ RETURN(-ENODEV);
}
cleanup_phase = 1; /* class_put_type */
- obd = class_name2obd(name);
- if (obd != NULL) {
- CERROR("obd %s already attached\n", name);
+ len = strlen(name) + 1;
+ OBD_ALLOC(namecopy, len);
+ if (!namecopy)
+ GOTO(out, rc = -ENOMEM);
+ memcpy(namecopy, name, len);
+ cleanup_phase = 2; /* free obd_name */
+
+ obd = class_newdev(type, namecopy);
+ if (obd == NULL) {
+ /* Already exists or out of obds */
+ CERROR("Can't create device %s\n", name);
GOTO(out, rc = -EEXIST);
}
-
- obd = class_newdev(type);
- if (obd == NULL)
- GOTO(out, rc = -EINVAL);
- cleanup_phase = 2; /* class_release_dev */
+ cleanup_phase = 3; /* class_release_dev */
INIT_LIST_HEAD(&obd->obd_exports);
obd->obd_num_exports = 0;
spin_lock_init(&obd->obd_uncommitted_replies_lock);
INIT_LIST_HEAD(&obd->obd_uncommitted_replies);
- len = strlen(name) + 1;
- OBD_ALLOC(obd->obd_name, len);
- if (!obd->obd_name)
- GOTO(out, rc = -ENOMEM);
- memcpy(obd->obd_name, name, len);
- cleanup_phase = 3; /* free obd_name */
-
len = strlen(uuid);
if (len >= sizeof(obd->obd_uuid)) {
CERROR("uuid must be < "LPSZ" bytes long\n",
out:
switch (cleanup_phase) {
case 3:
- OBD_FREE(obd->obd_name, strlen(obd->obd_name) + 1);
- case 2:
class_release_dev(obd);
+ case 2:
+ OBD_FREE(namecopy, strlen(namecopy) + 1);
case 1:
class_put_type(type);
}
CERROR("Device %d not attached\n", obd->obd_minor);
RETURN(-ENODEV);
}
-
- /* has this been done already? */
+
if (obd->obd_set_up) {
CERROR("Device %d already setup (type %s)\n",
obd->obd_minor, obd->obd_type->typ_name);
- RETURN(-EBUSY);
+ RETURN(-EEXIST);
}
+ /* is someone else setting us up right now? (attach inits spinlock) */
+ spin_lock(&obd->obd_dev_lock);
+ if (obd->obd_starting) {
+ spin_unlock(&obd->obd_dev_lock);
+ CERROR("Device %d setup in progress (type %s)\n",
+ obd->obd_minor, obd->obd_type->typ_name);
+ RETURN(-EEXIST);
+ }
+ /* just leave this on forever. I can't use obd_set_up here because
+ other fns check that status, and we're not actually set up yet. */
+ obd->obd_starting = 1;
+ spin_unlock(&obd->obd_dev_lock);
+
atomic_set(&obd->obd_refcount, 0);
exp = class_new_export(obd);
obd->obd_type->typ_refcnt++;
obd->obd_set_up = 1;
- RETURN(err);
+ RETURN(0);
err_exp:
class_unlink_export(obd->obd_self_export);
CERROR("OBD device %d still set up\n", obd->obd_minor);
RETURN(-EBUSY);
}
+
+ spin_lock(&obd->obd_dev_lock);
if (!obd->obd_attached) {
+ spin_unlock(&obd->obd_dev_lock);
CERROR("OBD device %d not attached\n", obd->obd_minor);
RETURN(-ENODEV);
}
+ obd->obd_attached = 0;
+ spin_unlock(&obd->obd_dev_lock);
+
if (OBP(obd, detach))
err = OBP(obd,detach)(obd);
CERROR("device %d: no name at detach\n", obd->obd_minor);
}
- obd->obd_attached = 0;
+ LASSERT(OBT(obd));
obd->obd_type->typ_refcnt--;
class_put_type(obd->obd_type);
class_release_dev(obd);
RETURN(-ENODEV);
}
+ spin_lock(&obd->obd_dev_lock);
+ if (obd->obd_stopping) {
+ spin_unlock(&obd->obd_dev_lock);
+ CERROR("OBD %d already stopping\n", obd->obd_minor);
+ RETURN(-ENODEV);
+ }
+ /* Leave this on forever */
+ obd->obd_stopping = 1;
+ spin_unlock(&obd->obd_dev_lock);
+
if (lcfg->lcfg_inlbuf1) {
for (flag = lcfg->lcfg_inlbuf1; *flag != 0; flag++)
switch (*flag) {
/* The one reference that should be remaining is the
* obd_self_export */
- if (atomic_read(&obd->obd_refcount) <= 1 ||
- flags & OBD_OPT_FORCE) {
- /* this will stop new connections, and need to
- do it before class_disconnect_exports() */
- obd->obd_stopping = 1;
- }
-
if (atomic_read(&obd->obd_refcount) > 1) {
struct l_wait_info lwi = LWI_TIMEOUT_INTR(1 * HZ, NULL,
NULL, NULL);
err = obd_cleanup(obd, flags);
out:
if (!err) {
- obd->obd_set_up = obd->obd_stopping = 0;
+ obd->obd_set_up = 0;
obd->obd_type->typ_refcnt--;
/* XXX this should be an LASSERT */
if (atomic_read(&obd->obd_refcount) > 0)
CERROR("%s still has refcount %d after "
"cleanup.\n", obd->obd_name,
atomic_read(&obd->obd_refcount));
+ } else {
+ /* Allow a failed cleanup to try again. Note this may be
+ unsafe, since we don't know where this one died. */
+ obd->obd_stopping = 0;
}
RETURN(err);
int len;
ENTRY;
+ if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) {
+ CERROR("test case OBD_FAIL_OST_ENOENT\n");
+ RETURN(ERR_PTR(-ENOENT));
+ }
+
if (id == 0) {
CERROR("fatal: invalid object id 0\n");
RETURN(ERR_PTR(-ESTALE));
sema_init(&filter->fo_alloc_lock, 1);
spin_lock_init(&filter->fo_r_pages.oh_lock);
spin_lock_init(&filter->fo_w_pages.oh_lock);
+ spin_lock_init(&filter->fo_read_rpc_hist.oh_lock);
+ spin_lock_init(&filter->fo_write_rpc_hist.oh_lock);
+ spin_lock_init(&filter->fo_r_io_time.oh_lock);
+ spin_lock_init(&filter->fo_w_io_time.oh_lock);
spin_lock_init(&filter->fo_r_discont_pages.oh_lock);
spin_lock_init(&filter->fo_w_discont_pages.oh_lock);
spin_lock_init(&filter->fo_r_discont_blocks.oh_lock);
spin_lock_init(&filter->fo_w_discont_blocks.oh_lock);
+ spin_lock_init(&filter->fo_r_disk_iosize.oh_lock);
+ spin_lock_init(&filter->fo_w_disk_iosize.oh_lock);
filter->fo_readcache_max_filesize = FILTER_MAX_CACHE_SIZE;
sprintf(ns_name, "filter-%s", obd->obd_uuid.uuid);
obd_size want, obd_size fs_space_left);
void filter_grant_commit(struct obd_export *exp, int niocount,
struct niobuf_local *res);
-int filter_alloc_iobuf(int rw, int num_pages, void **ret);
+int filter_alloc_iobuf(struct filter_obd *, int rw, int num_pages, void **ret);
void filter_free_iobuf(void *iobuf);
int filter_iobuf_add_page(struct obd_device *obd, void *iobuf,
struct inode *inode, struct page *page);
push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL);
- rc = filter_alloc_iobuf(OBD_BRW_READ, obj->ioo_bufcnt, &iobuf);
+ rc = filter_alloc_iobuf(&obd->u.filter, OBD_BRW_READ, obj->ioo_bufcnt,
+ &iobuf);
if (rc)
GOTO(cleanup, rc);
memset(res, 0, niocount * sizeof(*res));
- rc = filter_alloc_iobuf(OBD_BRW_READ, obj->ioo_bufcnt, &iobuf);
+ rc = filter_alloc_iobuf(&exp->exp_obd->u.filter, OBD_BRW_READ,
+ obj->ioo_bufcnt, &iobuf);
if (rc)
GOTO(cleanup, rc);
cleanup_phase = 1;
iobuf->length = 0;
}
-int filter_alloc_iobuf(int rw, int num_pages, void **ret)
+int filter_alloc_iobuf(struct filter_obd *filter, int rw, int num_pages,
+ void **ret)
{
int rc;
struct kiobuf *iobuf;
if (rc != 0)
GOTO(cleanup, rc);
- rc = filter_alloc_iobuf(OBD_BRW_WRITE, obj->ioo_bufcnt, &iobuf);
+ rc = filter_alloc_iobuf(&obd->u.filter, OBD_BRW_WRITE,
+ obj->ioo_bufcnt, &iobuf);
if (rc)
GOTO(cleanup, rc);
cleanup_phase = 1;
struct page **dr_pages;
unsigned long *dr_blocks;
spinlock_t dr_lock;
+ unsigned long dr_start_time; /* jiffies */
+ struct filter_obd *dr_filter;
};
+static void record_start_io(struct dio_request *dreq, int rw, int size)
+{
+ struct filter_obd *filter = dreq->dr_filter;
+ unsigned long flags;
+
+ atomic_inc(&dreq->dr_numreqs);
+
+ if (rw == OBD_BRW_READ) {
+ lprocfs_oh_tally(&filter->fo_read_rpc_hist,
+ filter->fo_r_in_flight);
+ lprocfs_oh_tally_log2(&filter->fo_r_disk_iosize, size);
+ } else {
+ lprocfs_oh_tally(&filter->fo_write_rpc_hist,
+ filter->fo_w_in_flight);
+ lprocfs_oh_tally_log2(&filter->fo_w_disk_iosize, size);
+ }
+ spin_lock_irqsave(&filter->fo_objidlock, flags);
+ if (rw == OBD_BRW_READ)
+ filter->fo_r_in_flight++;
+ else
+ filter->fo_w_in_flight++;
+ spin_unlock_irqrestore(&filter->fo_objidlock, flags);
+ dreq->dr_start_time = jiffies;
+}
+
+static void record_finish_io(struct dio_request *dreq, int rw, int rc)
+{
+ struct filter_obd *filter = dreq->dr_filter;
+ unsigned long flags, stop_time = jiffies;
+
+ spin_lock_irqsave(&filter->fo_objidlock, flags);
+ if (rw == OBD_BRW_READ)
+ filter->fo_r_in_flight--;
+ else
+ filter->fo_w_in_flight--;
+ spin_unlock_irqrestore(&filter->fo_objidlock, flags);
+
+ if (atomic_dec_and_test(&dreq->dr_numreqs))
+ wake_up(&dreq->dr_wait);
+
+ if (rc != 0)
+ return;
+
+ if (rw == OBD_BRW_READ) {
+ lprocfs_oh_tally_log2(&filter->fo_r_io_time,
+ stop_time - dreq->dr_start_time);
+ } else {
+ lprocfs_oh_tally_log2(&filter->fo_w_io_time,
+ stop_time - dreq->dr_start_time);
+ }
+}
+
static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
{
struct dio_request *dreq = bio->bi_private;
dreq->dr_error = error;
spin_unlock_irqrestore(&dreq->dr_lock, flags);
- if (atomic_dec_and_test(&dreq->dr_numreqs))
- wake_up(&dreq->dr_wait);
+ record_finish_io(dreq, test_bit(BIO_RW, &bio->bi_rw) ?
+ OBD_BRW_WRITE : OBD_BRW_READ, error);
return 0;
}
return bio->bi_sector + size == sector ? 1 : 0;
}
-int filter_alloc_iobuf(int rw, int num_pages, void **ret)
+int filter_alloc_iobuf(struct filter_obd *filter, int rw, int num_pages,
+ void **ret)
{
struct dio_request *dreq;
if (dreq->dr_blocks == NULL)
goto failed_2;
+ dreq->dr_filter = filter;
dreq->dr_bios = NULL;
init_waitqueue_head(&dreq->dr_wait);
atomic_set(&dreq->dr_numreqs, 0);
return 0;
}
-
+
int filter_do_bio(struct obd_device *obd, struct inode *inode,
struct dio_request *dreq, int rw)
{
continue; /* added this frag OK */
if (bio != NULL) {
- request_queue_t *q = bdev_get_queue(bio->bi_bdev);
+ request_queue_t *q =
+ bdev_get_queue(bio->bi_bdev);
/* Dang! I have to fragment this I/O */
CDEBUG(D_INODE, "bio++ sz %d vcnt %d(%d) "
bio_hw_segments(q, bio),
q->max_hw_segments);
- atomic_inc(&dreq->dr_numreqs);
+ record_start_io(dreq, rw, bio->bi_size);
rc = fsfilt_send_bio(rw, obd, inode, bio);
if (rc < 0) {
CERROR("Can't send bio: %d\n", rc);
- /* OK do dec; we do the waiting */
- atomic_dec(&dreq->dr_numreqs);
+ record_finish_io(dreq, rw, rc);
goto out;
}
- rc = 0;
-
- bio = NULL;
}
/* allocate new bio */
}
if (bio != NULL) {
- atomic_inc(&dreq->dr_numreqs);
+ record_start_io(dreq, rw, bio->bi_size);
rc = fsfilt_send_bio(rw, obd, inode, bio);
if (rc >= 0) {
rc = 0;
} else {
CERROR("Can't send bio: %d\n", rc);
- /* OK do dec; we do the waiting */
- atomic_dec(&dreq->dr_numreqs);
+ record_finish_io(dreq, rw, rc);
}
}
struct obd_device *obd = exp->exp_obd;
struct dio_request *dreq = iobuf;
struct inode *inode = dchild->d_inode;
- int rc;
- int rc2;
+ int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
+ int rc, rc2;
ENTRY;
LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw);
LASSERTF(dreq->dr_npages <= dreq->dr_max_pages, "%d,%d\n",
dreq->dr_npages, dreq->dr_max_pages);
-
- /* XXX FIXME these assertions should be handled properly here or
- * checked elsewhere */
LASSERT(dreq->dr_npages <= OBDFILTER_CREATED_SCRATCHPAD_ENTRIES);
+ LASSERT(dreq->dr_npages > 0 || rw != OBD_BRW_WRITE);
+
if (dreq->dr_npages == 0)
RETURN(0);
if (rw == OBD_BRW_WRITE) {
if (rc == 0) {
-#if 0
filter_tally_write(&obd->u.filter,
dreq->dr_pages,
- dreq->dr_page_idx,
+ dreq->dr_npages,
dreq->dr_blocks,
blocks_per_page);
-#endif
if (attr->ia_size > inode->i_size)
attr->ia_valid |= ATTR_SIZE;
rc = fsfilt_setattr(obd, dchild,
if (rc != 0)
GOTO(cleanup, rc);
- rc = filter_alloc_iobuf(OBD_BRW_WRITE, obj->ioo_bufcnt, (void **)&dreq);
+ rc = filter_alloc_iobuf(&obd->u.filter, OBD_BRW_WRITE, obj->ioo_bufcnt,
+ (void **)&dreq);
if (rc)
GOTO(cleanup, rc);
cleanup_phase = 1;
if (this_size > iattr.ia_size)
iattr.ia_size = this_size;
}
-#if 0
- /* I use this when I'm checking our lovely 1M I/Os reach the disk -eeb */
- if (total_size != (1<<20))
- CWARN("total size %d (%d pages)\n",
- total_size, total_size/PAGE_SIZE);
-#endif
+
push_ctxt(&saved, &obd->obd_ctxt, NULL);
cleanup_phase = 2;
break;
}
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ seq_printf(seq, "\n\t\t\tread\t\t\twrite\n");
+ seq_printf(seq, "disk ios in flight ios %% cum %% |");
+ seq_printf(seq, " rpcs %% cum %%\n");
+
+ read_tot = lprocfs_oh_sum(&filter->fo_read_rpc_hist);
+ write_tot = lprocfs_oh_sum(&filter->fo_write_rpc_hist);
+
+ read_cum = 0;
+ write_cum = 0;
+ for (i = 0; i < OBD_HIST_MAX; i++) {
+ unsigned long r = filter->fo_read_rpc_hist.oh_buckets[i];
+ unsigned long w = filter->fo_write_rpc_hist.oh_buckets[i];
+ read_cum += r;
+ write_cum += w;
+ seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n",
+ i, r, pct(r, read_tot),
+ pct(read_cum, read_tot), w,
+ pct(w, write_tot),
+ pct(write_cum, write_tot));
+ if (read_cum == read_tot && write_cum == write_tot)
+ break;
+ }
+
+ seq_printf(seq, "\n\t\t\tread\t\t\twrite\n");
+ seq_printf(seq, "io time (jiffies) rpcs %% cum %% |");
+ seq_printf(seq, " rpcs %% cum %%\n");
+
+ read_tot = lprocfs_oh_sum(&filter->fo_r_io_time);
+ write_tot = lprocfs_oh_sum(&filter->fo_w_io_time);
+
+ read_cum = 0;
+ write_cum = 0;
+ for (i = 0; i < OBD_HIST_MAX; i++) {
+ unsigned long r = filter->fo_r_io_time.oh_buckets[i];
+ unsigned long w = filter->fo_w_io_time.oh_buckets[i];
+ read_cum += r;
+ write_cum += w;
+ seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n",
+ 1 << i, r, pct(r, read_tot),
+ pct(read_cum, read_tot), w,
+ pct(w, write_tot),
+ pct(write_cum, write_tot));
+ if (read_cum == read_tot && write_cum == write_tot)
+ break;
+ }
+
+ seq_printf(seq, "\n\t\t\tread\t\t\twrite\n");
+ seq_printf(seq, "disk I/O size count %% cum %% |");
+ seq_printf(seq, " count %% cum %%\n");
+
+ read_tot = lprocfs_oh_sum(&filter->fo_r_disk_iosize);
+ write_tot = lprocfs_oh_sum(&filter->fo_w_disk_iosize);
+
+ read_cum = 0;
+ write_cum = 0;
+ for (i = 0; i < OBD_HIST_MAX; i++) {
+ unsigned long r = filter->fo_r_disk_iosize.oh_buckets[i];
+ unsigned long w = filter->fo_w_disk_iosize.oh_buckets[i];
+
+ read_cum += r;
+ write_cum += w;
+ if (i < 10)
+ seq_printf(seq, "%d", 1<<i);
+ else if (i < 20)
+ seq_printf(seq, "%dK", 1<<(i-10));
+ else
+ seq_printf(seq, "%dM", 1<<(i-20));
+
+ seq_printf(seq, ":\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n",
+ r, pct(r, read_tot), pct(read_cum, read_tot),
+ w, pct(w, write_tot), pct(write_cum, write_tot));
+ if (read_cum == read_tot && write_cum == write_tot)
+ break;
+ }
+#endif
+
return 0;
}
#undef pct
lprocfs_oh_clear(&filter->fo_r_pages);
lprocfs_oh_clear(&filter->fo_w_pages);
+ lprocfs_oh_clear(&filter->fo_read_rpc_hist);
+ lprocfs_oh_clear(&filter->fo_write_rpc_hist);
+ lprocfs_oh_clear(&filter->fo_r_io_time);
+ lprocfs_oh_clear(&filter->fo_w_io_time);
lprocfs_oh_clear(&filter->fo_r_discont_pages);
lprocfs_oh_clear(&filter->fo_w_discont_pages);
lprocfs_oh_clear(&filter->fo_r_discont_blocks);
lprocfs_oh_clear(&filter->fo_w_discont_blocks);
+ lprocfs_oh_clear(&filter->fo_r_disk_iosize);
+ lprocfs_oh_clear(&filter->fo_w_disk_iosize);
return len;
}
break;
}
+ seq_printf(seq, "\n\t\t\tread\t\t\twrite\n");
+ seq_printf(seq, "offset rpcs %% cum %% |");
+ seq_printf(seq, " rpcs %% cum %%\n");
+
+ read_tot = lprocfs_oh_sum(&cli->cl_read_offset_hist);
+ write_tot = lprocfs_oh_sum(&cli->cl_write_offset_hist);
+
+ read_cum = 0;
+ write_cum = 0;
+ for (i = 0; i < OBD_HIST_MAX; i++) {
+ unsigned long r = cli->cl_read_offset_hist.oh_buckets[i];
+ unsigned long w = cli->cl_write_offset_hist.oh_buckets[i];
+ read_cum += r;
+ write_cum += w;
+ seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n",
+ (i == 0) ? 0 : 1 << (i - 1),
+ r, pct(r, read_tot), pct(read_cum, read_tot),
+ w, pct(w, write_tot), pct(write_cum, write_tot));
+ if (read_cum == read_tot && write_cum == write_tot)
+ break;
+ }
+
spin_unlock_irqrestore(&cli->cl_loi_list_lock, flags);
return 0;
lprocfs_oh_clear(&cli->cl_write_rpc_hist);
lprocfs_oh_clear(&cli->cl_read_page_hist);
lprocfs_oh_clear(&cli->cl_write_page_hist);
+ lprocfs_oh_clear(&cli->cl_read_offset_hist);
+ lprocfs_oh_clear(&cli->cl_write_offset_hist);
return len;
}
struct osc_brw_async_args *aa;
struct obd_async_page_ops *ops;
LIST_HEAD(rpc_list);
+ unsigned int ending_offset;
+ unsigned starting_offset = 0;
ENTRY;
/* first we find the pages we're allowed to work with */
lop_update_pending(cli, lop, cmd, -1);
list_del_init(&oap->oap_urgent_item);
+ if (page_count == 0)
+ starting_offset = (oap->oap_obj_off + oap->oap_page_off) &
+ (PTLRPC_MAX_BRW_SIZE - 1);
+
/* ask the caller for the size of the io as the rpc leaves. */
if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE))
oap->oap_count =
list_add_tail(&oap->oap_rpc_item, &rpc_list);
if (++page_count >= cli->cl_max_pages_per_rpc)
break;
+
+ /* End on a PTLRPC_MAX_BRW_SIZE boundary. We want full-sized
+ * RPCs aligned on PTLRPC_MAX_BRW_SIZE boundaries to help reads
+ * have the same alignment as the initial writes that allocated
+ * extents on the server. */
+ ending_offset = (oap->oap_obj_off + oap->oap_page_off +
+ oap->oap_count) & (PTLRPC_MAX_BRW_SIZE - 1);
+ if (ending_offset == 0)
+ break;
}
osc_wake_cache_waiters(cli);
if (cmd == OBD_BRW_READ) {
lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count);
lprocfs_oh_tally(&cli->cl_read_rpc_hist, cli->cl_r_in_flight);
+ lprocfs_oh_tally_log2(&cli->cl_read_offset_hist,
+ starting_offset/PAGE_SIZE + 1);
} else {
lprocfs_oh_tally_log2(&cli->cl_write_page_hist, page_count);
lprocfs_oh_tally(&cli->cl_write_rpc_hist,
cli->cl_w_in_flight);
+ lprocfs_oh_tally_log2(&cli->cl_write_offset_hist,
+ starting_offset/PAGE_SIZE + 1);
}
#endif
IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
} else if (MSG_CONNECT_RECOVERING & msg_flags) {
CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
- imp->imp_obd->obd_name,
+ imp->imp_obd->obd_name,
imp->imp_target_uuid.uuid);
imp->imp_resend_replay = 1;
IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
}
static int signal_completed_replay(struct obd_import *imp)
- {
+{
struct ptlrpc_request *req;
ENTRY;
(long long)OBD_MD_FLCOOKIE);
LASSERTF(OBD_MD_FLGROUP == 16777216, " found %lld\n",
(long long)OBD_MD_FLGROUP);
- LASSERTF(OBD_FL_INLINEDATA == 1, " found %lld\n",
- (long long)OBD_FL_INLINEDATA);
- LASSERTF(OBD_FL_OBDMDEXISTS == 2, " found %lld\n",
- (long long)OBD_FL_OBDMDEXISTS);
LASSERTF(OBD_FL_DELORPHAN == 4, " found %lld\n",
(long long)OBD_FL_DELORPHAN);
- LASSERTF(OBD_FL_NORPC == 8, " found %lld\n",
- (long long)OBD_FL_NORPC);
- LASSERTF(OBD_FL_IDONLY == 16, " found %lld\n",
- (long long)OBD_FL_IDONLY);
LASSERTF(OBD_FL_RECREATE_OBJS == 32, " found %lld\n",
(long long)OBD_FL_RECREATE_OBJS);
(long long)offsetof(struct lov_ost_data_v1, l_ost_idx));
LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, " found %lld\n",
(long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx));
- LASSERTF(LOV_MAGIC_V0 == 198183888, " found %lld\n",
- (long long)LOV_MAGIC_V0);
LASSERTF(LOV_MAGIC_V1 == 198249424, " found %lld\n",
(long long)LOV_MAGIC_V1);
LASSERTF(LOV_PATTERN_RAID0 == 1, " found %lld\n",
my $line;
my $tagfile = new IO::File;
- if (!$tagfile->open("CVS/Tag")) {
+ if (!$tagfile->open("lustre/CVS/Tag")) {
my $verfile = new IO::File;
- if (!$verfile->open("include/config.h")) {
+ if (!$verfile->open("config.h")) {
return "UNKNOWN";
}
while(defined($line = <$verfile>)) {
noinst_PROGRAMS += tchmod toexcl fsx test_brw openclose createdestroy
noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink utime
noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat
-noinst_PROGRAMS += wantedi statone runas openfile getdents mkdirdeep o_directory
+noinst_PROGRAMS += wantedi statone runas openfile getdents o_directory
noinst_PROGRAMS += small_write multiop sleeptest ll_sparseness_verify cmknod
noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany rmdirmany
noinst_PROGRAMS += openfilleddirunlink rename_many memhog iopentest1 iopentest2
noinst_PROGRAMS += mmap_sanity
-# noinst_PROGRAMS += ldaptest copy_attr
+if MPITESTS
+noinst_PROGRAMS += write_append_truncate createmany_mpi
+endif
+# noinst_PROGRAMS += ldaptest copy_attr mkdirdeep
bin_PROGRAMS = mcreate munlink
endif # TESTS
stat_SOURCES = stat.c stat_fs.h
mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl $(LIBREADLINE)
-#write_append_truncate_CC=mpicc
-#createmany_mpi_CC=mpicc
+
+if MPITESTS
+LAM_LD_FLAGS=-L/opt/lam/lib -lmpi -llam -lpthread
+write_append_truncate_SOURCES=write_append_truncate.c
+write_append_truncate_LDADD=$(LAM_LD_FLAGS)
+createmany_mpi_SOURCES=createmany-mpi.c
+createmany_mpi_LDADD=$(LAM_LD_FLAGS)
+endif
+
#copy_attr_LDADD= -lattr
int main(int argc, char **argv)
{
int fd;
- char *rbuf, *wbuf;
+ char *wbuf;
int blocks, seek_blocks;
long len;
off64_t seek;
struct stat64 st;
+ int action;
int rc;
- if (argc < 4 || argc > 5) {
- printf("Usage: %s file seek nr_blocks [blocksize]\n", argv[0]);
+ if (argc < 5 || argc > 6) {
+ printf("Usage: %s <read/write/rdwr> file seek nr_blocks [blocksize]\n", argv[0]);
return 1;
}
- seek_blocks = strtoul(argv[2], 0, 0);
- blocks = strtoul(argv[3], 0, 0);
+ if (!strcmp(argv[1], "read"))
+ action = O_RDONLY;
+ else if (!strcmp(argv[1], "write"))
+ action = O_WRONLY;
+ else if (!strcmp(argv[1], "rdwr"))
+ action = O_RDWR;
+ else {
+ printf("Usage: %s <read/write/rdwr> file seek nr_blocks [blocksize]\n", argv[0]);
+ return 1;
+ }
+
+ seek_blocks = strtoul(argv[3], 0, 0);
+ blocks = strtoul(argv[4], 0, 0);
+ if (!blocks) {
+ printf("Usage: %s <read/write/rdwr> file seek nr_blocks [blocksize]\n", argv[0]);
+ return 1;
+ }
- fd = open(argv[1], O_LARGEFILE | O_DIRECT | O_RDWR | O_CREAT, 0644);
+ fd = open(argv[2], O_LARGEFILE | O_DIRECT | O_RDWR | O_CREAT, 0644);
if (fd == -1) {
printf("Cannot open %s: %s\n", argv[1], strerror(errno));
return 1;
}
- if (argc == 5)
+ if (argc == 6)
st.st_blksize = strtoul(argv[4], 0, 0);
else if (fstat64(fd, &st) < 0) {
printf("Cannot stat %s: %s\n", argv[1], strerror(errno));
st.st_blksize);
seek = (off64_t)seek_blocks * (off64_t)st.st_blksize;
- if (lseek64(fd, seek, SEEK_SET) < 0) {
- printf("lseek64 failed: %s\n", strerror(errno));
- return 1;
- }
-
len = blocks * st.st_blksize;
+
wbuf = mmap(0, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, 0, 0);
if (wbuf == MAP_FAILED) {
printf("No memory %s\n", strerror(errno));
return 1;
}
+ memset(wbuf, 0xba, len);
- rbuf = mmap(0, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, 0, 0);
- if (rbuf == MAP_FAILED) {
- printf("No memory %s\n", strerror(errno));
- return 1;
- }
+ if (action == O_WRONLY || action == O_RDWR) {
+ if (lseek64(fd, seek, SEEK_SET) < 0) {
+ printf("lseek64 failed: %s\n", strerror(errno));
+ return 1;
+ }
- memset(wbuf, 0xba, len);
- rc = write(fd, wbuf, len);
- if (rc != len) {
- printf("Write error %s (rc = %d)\n", strerror(errno), rc);
- return 1;
+ rc = write(fd, wbuf, len);
+ if (rc != len) {
+ printf("Write error %s (rc = %d)\n", strerror(errno), rc);
+ return 1;
+ }
}
- if (lseek64(fd, seek, SEEK_SET) < 0) {
- printf("Cannot seek %s\n", strerror(errno));
- return 1;
- }
+ if (action == O_RDONLY || action == O_RDWR) {
+ char *rbuf;
- rc = read(fd, rbuf, len);
- if (rc != len) {
- printf("Read error: %s (rc = %d)\n", strerror(errno), rc);
- return 1;
- }
+ if (lseek64(fd, seek, SEEK_SET) < 0) {
+ printf("Cannot seek %s\n", strerror(errno));
+ return 1;
+ }
- if (memcmp(wbuf, rbuf, len)) {
- printf("Data mismatch\n");
- return 1;
+ rbuf = mmap(0, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, 0, 0);
+ if (rbuf == MAP_FAILED) {
+ printf("No memory %s\n", strerror(errno));
+ return 1;
+ }
+
+ rc = read(fd, rbuf, len);
+ if (rc != len) {
+ printf("Read error: %s (rc = %d)\n", strerror(errno), rc);
+ return 1;
+ }
+
+ if (memcmp(wbuf, rbuf, len)) {
+ printf("Data mismatch\n");
+ return 1;
+ }
}
printf("PASS\n");
#define T1 "write data before unlink\n"
#define T2 "write data after unlink\n"
-char buf[128];
+char buf[] = "yabba dabba doo, I'm coming for you, I live in a shoe, I don't know what to do.\n'Bigger, bigger,and bigger yet!' cried the Creator. 'You are not yet substantial enough for my boundless intents!' And ever greater and greater the object became, until all was lost 'neath its momentus bulk.\n";
char usage[] =
"Usage: %s filename command-sequence\n"
" c close\n"
" _ wait for signal\n"
" R reference entire mmap-ed region\n"
-" r read\n"
+" r[num] read [optional length]\n"
" s stat\n"
" S fstat\n"
" t fchmod\n"
-" T ftruncate to zero\n"
-" w write\n"
+" T[num] ftruncate [optional position, default 0]\n"
+" w[num] write optional length\n"
" W write entire mmap-ed region\n"
" y fsync\n"
" Y fdatasync\n"
-" z seek to zero\n";
+" z[num] seek [optional position, default 0]\n";
static int usr1_received;
void usr1_handler(int unused)
return argv[cur_arg++];
}
#define POP_ARG() (pop_arg(argc, argv))
+#define min(a,b) ((a)>(b)?(b):(a))
int main(int argc, char **argv)
{
struct stat st;
size_t mmap_len = 0, i;
unsigned char *mmap_ptr = NULL, junk = 0;
- int fd = -1;
+ int rc, len, fd = -1;
if (argc < 3) {
fprintf(stderr, usage, argv[0]);
exit(1);
}
break;
- case 'r': {
- char buf;
- if (read(fd, &buf, 1) == -1) {
- perror("read");
- exit(1);
- }
+ case 'r':
+ len = atoi(commands+1);
+ if (len <= 0)
+ len = 1;
+ while(len > 0) {
+ if (read(fd, &buf,
+ min(len,sizeof(buf))) == -1) {
+ perror("read");
+ exit(1);
+ }
+ len -= sizeof(buf);
}
+ break;
case 'S':
if (fstat(fd, &st) == -1) {
perror("fstat");
}
break;
case 'T':
- if (ftruncate(fd, 0) == -1) {
+ len = atoi(commands+1);
+ if (ftruncate(fd, len) == -1) {
+ printf("ftruncate (%d,%d)\n", fd, len);
perror("ftruncate");
exit(1);
}
exit(1);
}
break;
- case 'w': {
- int rc;
- if ((rc = write(fd, "w", 1)) == -1) {
- perror("write");
- exit(1);
+ case 'w':
+ len = atoi(commands+1);
+ if (len <= 0)
+ len = 1;
+ while(len > 0) {
+ if ((rc = write(fd, buf,
+ min(len, sizeof(buf))))
+ == -1) {
+ perror("write");
+ exit(1);
+ }
+ len -= sizeof(buf);
}
break;
- }
case 'W':
for (i = 0; i < mmap_len && mmap_ptr; i += 4096)
mmap_ptr[i] += junk++;
exit(1);
}
case 'z':
- if (lseek(fd, 0, SEEK_SET) == -1) {
+ len = atoi(commands+1);
+ if (lseek(fd, len, SEEK_SET) == -1) {
perror("lseek");
exit(1);
}
break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ break;
default:
fprintf(stderr, "unknown command \"%c\"\n", *commands);
fprintf(stderr, usage, argv[0]);
done
if [ -z "$OSCFULL" ]; then
echo "no OSTs are close to full"
- grep "[0-9]" /proc/fs/lustre/osc/OSC*MNT*/{kbytesavail,cur*} |tee -a $LOG
+ grep "[0-9]" /proc/fs/lustre/osc/OSC*MNT*/{kbytesavail,cur*}|tee -a $LOG
SUCCESS=0
fi
wait $OPENPID
dmesg | grep "entering recovery in server" && \
error "client not evicted" || true
- set +vx
}
run_test 18 "ldlm_handle_enqueue succeeds on evicted export (3822)"
gen_config() {
rm -f $XMLCONFIG
add_mds mds --dev $MDSDEV --size $MDSSIZE
- add_lov lov1 mds --stripe_sz $STRIPE_BYTES\
+ add_lov lov1 mds --stripe_sz $STRIPE_BYTES \
--stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE --failover
if [ ! -z "$ostfailover_HOST" ]; then
[ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
start mds $MDSLCONFARGS --reformat
grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
- #echo 0x3f0410 > /proc/sys/portals/debug
}
$SETUP
// The command to be run
execvp(my_argv[0], my_argv);
- fprintf(stderr, "execvp fails running %s\n", my_argv[0]);
+ fprintf(stderr, "execvp fails running %s (%d): %s\n", my_argv[0],
+ errno, strerror(errno));
exit(-1);
}
ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42c 45"}
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
-[ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT"
+[ "$ALWAYS_EXCEPT$EXCEPT" ] && \
+ echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT`"
SRCDIR=`dirname $0`
export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH:/sbin
IOPENTEST1=${IOPENTEST1:-iopentest1}
IOPENTEST2=${IOPENTEST2:-iopentest2}
MEMHOG=${MEMHOG:-memhog}
+DIRECTIO=${DIRECTIO:-directio}
if [ $UID -ne 0 ]; then
RUNAS_ID="$UID"
log() {
echo "$*"
- lctl mark "$*" 2> /dev/null || true
+ $LCTL mark "$*" 2> /dev/null || true
}
trace() {
# bug 2248 - async write errors didn't return to application on sync
# bug 3677 - async write errors left page locked
test_63b() {
+ DBG_SAVE=`cat /proc/sys/portals/debug`
+ sysctl -w portals.debug=-1
+
# ensure we have a grant to do async writes
dd if=/dev/zero of=/mnt/lustre/f63b bs=4k count=1
rm /mnt/lustre/f63b
#define OBD_FAIL_OSC_BRW_PREP_REQ 0x406
sysctl -w lustre.fail_loc=0x80000406
- multiop /mnt/lustre/f63b Owy && error "sync didn't return ENOMEM"
+ multiop /mnt/lustre/f63b Owy && \
+ $LCTL dk /tmp/test63b.debug && \
+ sysctl -w portals.debug=$DBG_SAVE && \
+ error "sync didn't return ENOMEM"
grep -q locked /proc/fs/lustre/llite/fs*/dump_page_cache && \
+ $LCTL dk /tmp/test63b.debug && \
+ sysctl -w portls.debug=$DBG_SAVE && \
error "locked page left in cache after async error" || true
+ sysctl -w portals.debug=$DBG_SAVE
}
run_test 63b "async write errors should be returned to fsync ==="
}
run_test 68 "support swapping to Lustre ========================"
+# bug5265, obdfilter oa2dentry return -ENOENT
+# #define OBD_FAIL_OST_ENOENT 0x217
+test_69() {
+ [ -z "`lsmod|grep obdfilter`" ] &&
+ echo "skipping test 69 (remote OST)" && return
+
+ f="$DIR/f69"
+ touch $f
+
+ echo 0x217 > /proc/sys/lustre/fail_loc
+ truncate $f 1 # vmtruncate() will ignore truncate() error.
+ $DIRECTIO write $f 0 2 && error "write succeeded, expect -ENOENT"
+
+ echo 0 > /proc/sys/lustre/fail_loc
+ $DIRECTIO write $f 0 2 || error "write error"
+
+ cancel_lru_locks OSC
+ $DIRECTIO read $f 0 1 || error "read error"
+
+ echo 0x217 > /proc/sys/lustre/fail_loc
+ $DIRECTIO read $f 1 1 && error "read succeeded, expect -ENOENT"
+
+ echo 0 > /proc/sys/lustre/fail_loc
+ rm -f $f
+}
+run_test 69 "verify oa2dentry return -ENOENT doesn't LBUG ======"
+
# on the LLNL clusters, runas will still pick up root's $TMP settings,
# which will not be writable for the runas user, and then you get a CVS
# error message with a corrupt path string (CVS bug) and panic.
}
#run_test 19 "test concurrent uncached read races ==============="
+test_20() {
+ mkdir $DIR1/d20
+ cancel_lru_locks OSC
+ CNT=$((`cat /proc/fs/lustre/llite/fs0/dump_page_cache | wc -l`))
+ multiop $DIR1/f20 Ow8190c
+ multiop $DIR2/f20 Oz8194w8190c
+ multiop $DIR1/f20 Oz0r8190c
+ cancel_lru_locks OSC
+ CNTD=$((`cat /proc/fs/lustre/llite/fs0/dump_page_cache | wc -l` - $CNT))
+ [ $CNTD -gt 0 ] && \
+ error $CNTD" page left in cache after lock cancel" || true
+}
+
+run_test 20 "test extra readahead page left in cache ===="
+
+
log "cleanup: ======================================================"
rm -rf $DIR1/[df][0-9]* $DIR1/lnk || true
for d in /proc/fs/lustre/ldlm/namespaces/$1*; do
if [ -f $d/lru_size ]; then
echo clear > $d/lru_size
- grep [0-9] $d/lock_unused_count
+ grep "[0-9]" $d/lock_unused_count
fi
done
$LCTL mark "cancel_lru_locks stop"
}
build_test_filter() {
- [ "$ONLY" ] && log "only running test $ONLY"
+ [ "$ONLY" ] && log "only running test `echo $ONLY`"
for O in $ONLY; do
eval ONLY_${O}=true
done
- [ "$EXCEPT$ALWAYS_EXCEPT" ] && log "skipping $EXCEPT $ALWAYS_EXCEPT"
+ [ "$EXCEPT$ALWAYS_EXCEPT" ] && \
+ log "skipping `echo $EXCEPT $ALWAYS_EXCEPT`"
for E in $EXCEPT $ALWAYS_EXCEPT; do
eval EXCEPT_${E}=true
done
#include <linux/types.h>
#include <linux/unistd.h>
+#include <portals/ptlctl.h>
+
#include <liblustre.h>
#include <linux/obd.h>
#include <linux/lustre_lib.h>
#include <lustre/lustre_user.h>
#include <linux/obd_lov.h>
-#include <portals/ptlctl.h>
-
static void err_msg(char *fmt, ...)
{
va_list args;
static int ignored_option(const char *check)
{
char *ignore[] = { "noatime", "async", "rw", "suid", "dev",
- "exec", "nouser", NULL };
+ "exec", "nouser", "auto", "noauto", NULL };
char **which = ignore;
while (*which != NULL) {
progname = strrchr(argv[0], '/');
progname = progname ? progname + 1 : argv[0];
- while ((opt = getopt_long(argc, argv, "fhno:v" ,long_opt,NULL)) != EOF){
+ while ((opt = getopt_long(argc, argv, "fhno:v", long_opt,NULL)) != EOF){
switch (opt) {
case 1:
++force;
struct timeval start, next_time;
__u64 count, next_count, len, stride, thr_offset = 0, objid = 3;
int write = 0, verbose = 1, cmd, i, rc = 0, pages = 1;
+ int offset_pages = 0;
long n;
int repeat_offset = 0;
unsigned long long ull;
if (argc >= 5) {
pages = strtoul(argv[4], &end, 0);
- if (*end) {
- fprintf(stderr, "error: %s: bad page count '%s'\n",
+
+ if (*end == '+')
+ offset_pages = strtoul(end + 1, &end, 0);
+
+ if (*end != 0 ||
+ offset_pages < 0 || offset_pages >= pages) {
+ fprintf(stderr, "error: %s: bad npages[+offset] parameter '%s'\n",
jt_cmdname(argv[0]), argv[4]);
return CMD_HELP;
}
}
len = pages * PAGE_SIZE;
+ thr_offset = offset_pages * PAGE_SIZE;
stride = len;
-
+
if (thread) {
pthread_mutex_lock (&shared_data->mutex);
if (nthr_per_obj != 0) {
obj_idx = (thread - 1)/nthr_per_obj;
objid += obj_idx;
stride *= nthr_per_obj;
- thr_offset = ((thread - 1) % nthr_per_obj) * len;
- if (thr_offset == 0)
- shared_data->offsets[obj_idx] = stride;
+ if (thread == 1)
+ shared_data->offsets[obj_idx] = stride + thr_offset;
+ thr_offset += ((thread - 1) % nthr_per_obj) * len;
} else {
/* threads disjoint */
- thr_offset = (thread - 1) * len;
+ thr_offset += (thread - 1) * len;
}
shared_data->barrier--;
jt_cmdname(argv[0]), i, strerror(rc = errno),
write ? "write" : "read");
break;
- } else if (be_verbose(verbose, &next_time,i, &next_count,count))
+ } else if (be_verbose(verbose, &next_time,i, &next_count,count)) {
+ pthread_mutex_lock (&shared_data->mutex);
printf("%s: %s number %d @ "LPD64":"LPU64" for %d\n",
jt_cmdname(argv[0]), write ? "write" : "read", i,
data.ioc_obdo1.o_id, data.ioc_offset,
(int)(pages * PAGE_SIZE));
-
+ pthread_mutex_unlock (&shared_data->mutex);
+ }
+
if (!repeat_offset) {
if (stride == len) {
data.ioc_offset += stride;
CHECK_VALUE(OBD_MD_FLCOOKIE);
CHECK_VALUE(OBD_MD_FLGROUP);
- CHECK_VALUE(OBD_FL_INLINEDATA);
- CHECK_VALUE(OBD_FL_OBDMDEXISTS);
CHECK_VALUE(OBD_FL_DELORPHAN);
- CHECK_VALUE(OBD_FL_NORPC);
- CHECK_VALUE(OBD_FL_IDONLY);
CHECK_VALUE(OBD_FL_RECREATE_OBJS);
}
CHECK_MEMBER(lov_ost_data_v1, l_ost_gen);
CHECK_MEMBER(lov_ost_data_v1, l_ost_idx);
- CHECK_VALUE(LOV_MAGIC_V0);
CHECK_VALUE(LOV_MAGIC_V1);
CHECK_VALUE(LOV_PATTERN_RAID0);
(long long)OBD_MD_FLCOOKIE);
LASSERTF(OBD_MD_FLGROUP == 16777216, " found %lld\n",
(long long)OBD_MD_FLGROUP);
- LASSERTF(OBD_FL_INLINEDATA == 1, " found %lld\n",
- (long long)OBD_FL_INLINEDATA);
- LASSERTF(OBD_FL_OBDMDEXISTS == 2, " found %lld\n",
- (long long)OBD_FL_OBDMDEXISTS);
LASSERTF(OBD_FL_DELORPHAN == 4, " found %lld\n",
(long long)OBD_FL_DELORPHAN);
- LASSERTF(OBD_FL_NORPC == 8, " found %lld\n",
- (long long)OBD_FL_NORPC);
- LASSERTF(OBD_FL_IDONLY == 16, " found %lld\n",
- (long long)OBD_FL_IDONLY);
LASSERTF(OBD_FL_RECREATE_OBJS == 32, " found %lld\n",
(long long)OBD_FL_RECREATE_OBJS);
(long long)offsetof(struct lov_ost_data_v1, l_ost_idx));
LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, " found %lld\n",
(long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx));
- LASSERTF(LOV_MAGIC_V0 == 198183888, " found %lld\n",
- (long long)LOV_MAGIC_V0);
LASSERTF(LOV_MAGIC_V1 == 198249424, " found %lld\n",
(long long)LOV_MAGIC_V1);
LASSERTF(LOV_PATTERN_RAID0 == 1, " found %lld\n",