Diagnostic patch to check whether lustre read-only device mechanism works well or not.
i=alex.zhuravlev
i=rahul.deshmukh
--- /dev/null
+Index: linux-stage/fs/ext3/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext3/mballoc.c 2010-01-26 23:36:25.000000000 +0800
++++ linux-stage/fs/ext3/mballoc.c 2010-01-26 23:42:57.000000000 +0800
+@@ -35,6 +35,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/seq_file.h>
+ #include <linux/version.h>
++#include <linux/genhd.h>
+
+ #include "group.h"
+
+@@ -360,6 +361,7 @@
+ unsigned short pa_free; /* how many blocks are free */
+ unsigned short pa_linear; /* consumed in one direction
+ * strictly, for group prealloc */
++ unsigned short pa_error;
+ spinlock_t *pa_obj_lock;
+ struct inode *pa_inode; /* hack, for history only */
+ };
+@@ -3558,6 +3560,7 @@
+ spin_lock_init(&pa->pa_lock);
+ pa->pa_deleted = 0;
+ pa->pa_linear = 0;
++ pa->pa_error = 0;
+
+ mb_debug("new inode pa %p: %lu/%lu for %lu\n", pa,
+ pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+@@ -3615,6 +3618,7 @@
+ spin_lock_init(&pa->pa_lock);
+ pa->pa_deleted = 0;
+ pa->pa_linear = 1;
++ pa->pa_error = 0;
+
+ mb_debug("new group pa %p: %lu/%lu for %lu\n", pa,
+ pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+@@ -3671,7 +3675,10 @@
+ sector_t start;
+ int err = 0, free = 0;
+
++ BUG_ON(!ext3_is_group_locked(sb, e3b->bd_group));
+ BUG_ON(pa->pa_deleted == 0);
++ BUG_ON(pa->pa_linear != 0);
++ BUG_ON(pa->pa_inode == NULL);
+ ext3_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
+ BUG_ON(group != e3b->bd_group && pa->pa_len != 0);
+ end = bit + pa->pa_len;
+@@ -3704,14 +3711,19 @@
+ mb_free_blocks(pa->pa_inode, e3b, bit, next - bit);
+ bit = next + 1;
+ }
+- if (free != pa->pa_free) {
+- printk("pa %p: logic %lu, phys. %lu, len %lu\n",
+- pa, (unsigned long) pa->pa_lstart,
+- (unsigned long) pa->pa_pstart,
+- (unsigned long) pa->pa_len);
+- printk("free %u, pa_free %u\n", free, pa->pa_free);
+- }
+- BUG_ON(free != pa->pa_free);
++
++ /* "free < pa->pa_free" means we maybe double alloc the same blocks,
++ * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
++ if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free))
++ ext3_error(sb, __FUNCTION__, "pa free mismatch: [pa %p] "
++ "[phy %lu] [logic %lu] [len %u] [free %u] "
++ "[error %u] [inode %lu] [freed %u]", pa,
++ (unsigned long)pa->pa_pstart,
++ (unsigned long)pa->pa_lstart,
++ (unsigned)pa->pa_len, (unsigned)pa->pa_free,
++ (unsigned)pa->pa_error, pa->pa_inode->i_ino,
++ free);
++ BUG_ON(pa->pa_free != free);
+ atomic_add(free, &sbi->s_mb_discarded);
+
+ return err;
+@@ -4189,6 +4201,19 @@
+ *errp = -EDQUOT;
+ return 0;
+ }
++
++ if (dev_check_rdonly(sb->s_bdev)) {
++ struct block_device *bdev = sb->s_bdev;
++
++ printk(KERN_WARNING "Alloc from readonly device %s (%#x): "
++ "[inode %lu] [logic %lu] [goal %lu] [ll %lu] [pl %lu] "
++ "[lr %lu] [pr %lu] [len %lu] [flags %lu]\n",
++ bdev->bd_disk ? bdev->bd_disk->disk_name : "",
++ bdev->bd_dev, ar->inode->i_ino, ar->logical, ar->goal,
++ ar->lleft, ar->pleft, ar->lright, ar->pright, ar->len,
++ ar->flags);
++ }
++
+ inquota = ar->len;
+
+ ext3_mb_poll_new_transaction(sb, handle);
+@@ -4217,10 +4242,34 @@
+ }
+
+ if (likely(ac.ac_status == AC_STATUS_FOUND)) {
+- ext3_mb_mark_diskspace_used(&ac, handle);
+- *errp = 0;
+- block = ext3_grp_offs_to_block(sb, &ac.ac_b_ex);
+- ar->len = ac.ac_b_ex.fe_len;
++ *errp = ext3_mb_mark_diskspace_used(&ac, handle);
++ if (!*errp) {
++ block = ext3_grp_offs_to_block(sb, &ac.ac_b_ex);
++ ar->len = ac.ac_b_ex.fe_len;
++ } else {
++ ac.ac_b_ex.fe_len = 0;
++ ar->len = 0;
++ ext3_mb_show_ac(&ac);
++ if (ac.ac_pa) {
++ struct ext3_prealloc_space *pa = ac.ac_pa;
++
++ /* We can not make sure whether the bitmap has
++ * been updated or not when fail case. So can
++ * not revert pa_free back, just mark pa_error*/
++ pa->pa_error++;
++ ext3_error(sb, __FUNCTION__,
++ "Updating bitmap error: [err %d] "
++ "[pa %p] [phy %lu] [logic %lu] "
++ "[len %u] [free %u] [error %u] "
++ "[inode %lu]", *errp, pa,
++ (unsigned long)pa->pa_pstart,
++ (unsigned long)pa->pa_lstart,
++ (unsigned)pa->pa_len,
++ (unsigned)pa->pa_free,
++ (unsigned)pa->pa_error,
++ pa->pa_inode ? pa->pa_inode->i_ino : 0);
++ }
++ }
+ } else {
+ freed = ext3_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len);
+ if (freed)
+@@ -4388,6 +4437,15 @@
+ goto error_return;
+ }
+
++ if (dev_check_rdonly(sb->s_bdev)) {
++ struct block_device *bdev = sb->s_bdev;
++
++ printk(KERN_WARNING "Release to readonly device %s (%#x): "
++ "[inode %lu] [block %lu] [count %lu] [is_meta %d]\n",
++ bdev->bd_disk ? bdev->bd_disk->disk_name : "",
++ bdev->bd_dev, inode->i_ino, block, count, metadata);
++ }
++
+ ext3_debug("freeing block %lu\n", block);
+
+ ac.ac_op = EXT3_MB_HISTORY_FREE;
--- /dev/null
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c 2010-01-26 22:50:37.000000000 +0800
++++ linux-stage/fs/ext4/mballoc.c 2010-01-26 22:57:24.000000000 +0800
+@@ -3892,6 +3892,7 @@
+ INIT_LIST_HEAD(&pa->pa_group_list);
+ pa->pa_deleted = 0;
+ pa->pa_linear = 0;
++ pa->pa_error = 0;
+
+ mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
+ pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+@@ -3956,6 +3957,7 @@
+ INIT_LIST_HEAD(&pa->pa_group_list);
+ pa->pa_deleted = 0;
+ pa->pa_linear = 1;
++ pa->pa_error = 0;
+
+ mb_debug("new group pa %p: %llu/%u for %u\n", pa,
+ pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+@@ -4019,7 +4021,10 @@
+ int err = 0;
+ int free = 0;
+
++ BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+ BUG_ON(pa->pa_deleted == 0);
++ BUG_ON(pa->pa_linear != 0);
++ BUG_ON(pa->pa_inode == NULL);
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
+ grp_blk_start = pa->pa_pstart - bit;
+ BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
+@@ -4059,11 +4064,18 @@
+ mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
+ bit = next + 1;
+ }
+- if (free != pa->pa_free) {
+- printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
+- pa, (unsigned long) pa->pa_lstart,
+- (unsigned long) pa->pa_pstart,
+- (unsigned long) pa->pa_len);
++
++ /* "free < pa->pa_free" means we maybe double alloc the same blocks,
++ * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
++ if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free)) {
++ ext4_error(sb, __FUNCTION__, "pa free mismatch: [pa %p] "
++ "[phy %lu] [logic %lu] [len %u] [free %u] "
++ "[error %u] [inode %lu] [freed %u]", pa,
++ (unsigned long)pa->pa_pstart,
++ (unsigned long)pa->pa_lstart,
++ (unsigned)pa->pa_len, (unsigned)pa->pa_free,
++ (unsigned)pa->pa_error, pa->pa_inode->i_ino,
++ free);
+ ext4_grp_locked_error(sb, group,
+ __func__, "free %u, pa_free %u",
+ free, pa->pa_free);
+@@ -4072,6 +4084,7 @@
+ * from the bitmap and continue.
+ */
+ }
++ BUG_ON(pa->pa_free != free);
+ atomic_add(free, &sbi->s_mb_discarded);
+
+ return err;
+@@ -4800,6 +4813,24 @@
+ *errp = -EDQUOT;
+ goto out3;
+ }
++
++ if (dev_check_rdonly(sb->s_bdev)) {
++ struct block_device *bdev = sb->s_bdev;
++
++ printk(KERN_WARNING "Alloc from readonly device %s (%#x): "
++ "[inode %lu] [logic %llu] [goal %llu] [ll %llu] "
++ "[pl %llu] [lr %llu] [pr %llu] [len %u] [flags %u]\n",
++ bdev->bd_disk ? bdev->bd_disk->disk_name : "",
++ bdev->bd_dev, ar->inode->i_ino,
++ (unsigned long long)ar->logical,
++ (unsigned long long)ar->goal,
++ (unsigned long long)ar->lleft,
++ (unsigned long long)ar->pleft,
++ (unsigned long long)ar->lright,
++ (unsigned long long)ar->pright,
++ ar->len, ar->flags);
++ }
++
+ inquota = ar->len;
+
+ if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+@@ -4850,6 +4881,25 @@
+ ac->ac_b_ex.fe_len = 0;
+ ar->len = 0;
+ ext4_mb_show_ac(ac);
++ if (ac->ac_pa) {
++ struct ext4_prealloc_space *pa = ac->ac_pa;
++
++ /* We can not make sure whether the bitmap has
++ * been updated or not when fail case. So can
++ * not revert pa_free back, just mark pa_error*/
++ pa->pa_error++;
++ ext4_error(sb, __FUNCTION__,
++ "Updating bitmap error: [err %d] "
++ "[pa %p] [phy %lu] [logic %lu] "
++ "[len %u] [free %u] [error %u] "
++ "[inode %lu]", *errp, pa,
++ (unsigned long)pa->pa_pstart,
++ (unsigned long)pa->pa_lstart,
++ (unsigned)pa->pa_len,
++ (unsigned)pa->pa_free,
++ (unsigned)pa->pa_error,
++ pa->pa_inode ? pa->pa_inode->i_ino : 0);
++ }
+ } else {
+ block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
+ ar->len = ac->ac_b_ex.fe_len;
+@@ -5025,6 +5075,15 @@
+ goto error_return;
+ }
+
++ if (dev_check_rdonly(sb->s_bdev)) {
++ struct block_device *bdev = sb->s_bdev;
++
++ printk(KERN_WARNING "Release to readonly device %s (%#x): "
++ "[inode %lu] [block %lu] [count %lu] [is_meta %d]\n",
++ bdev->bd_disk ? bdev->bd_disk->disk_name : "",
++ bdev->bd_dev, inode->i_ino, block, count, metadata);
++ }
++
+ ext4_debug("freeing block %lu\n", block);
+ trace_mark(ext4_free_blocks,
+ "dev %s block %llu count %lu metadata %d ino %lu",
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h 2010-01-26 22:50:36.000000000 +0800
++++ linux-stage/fs/ext4/mballoc.h 2010-01-26 22:52:58.000000000 +0800
+@@ -21,6 +21,7 @@
+ #include <linux/blkdev.h>
+ #include <linux/marker.h>
+ #include <linux/mutex.h>
++#include <linux/genhd.h>
+ #include "ext4_jbd2.h"
+ #include "ext4.h"
+ #include "group.h"
+@@ -134,6 +135,7 @@
+ unsigned short pa_free; /* how many blocks are free */
+ unsigned short pa_linear; /* consumed in one direction
+ * strictly, for grp prealloc */
++ unsigned short pa_error;
+ spinlock_t *pa_obj_lock;
+ struct inode *pa_inode; /* hack, for history only */
+ };
ext4-kill-dx_root.patch
ext4-extents-mount-option-rhel5.patch
ext4-fiemap-2.6-rhel5.patch
+ext4-mballoc-pa_free-mismatch.patch
ext3-corrupted-orphans-2.6.patch
ext3-kill-dx_root.patch
ext3-fiemap-2.6-rhel5.patch
+ext3-mballoc-pa_free-mismatch.patch
ext3-dynlocks-2.6-rhel5.patch
ext3-hash-indexed-dir-dotdot-update.patch
ext3-corrupted-orphans-2.6.patch
+ext3-mballoc-pa_free-mismatch.patch
ext4-dynlocks-2.6-rhel5.patch
ext4-hash-indexed-dir-dotdot-update.patch
ext4-disable-write-bar-by-default.patch
+ext4-mballoc-pa_free-mismatch.patch
/**
* generic_make_request: hand a buffer to its device driver for I/O
* @bio: The bio describing the location in memory and on the device.
-@@ -3075,6 +3077,12 @@ end_io:
+@@ -3075,6 +3077,23 @@ end_io:
if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
goto end_io;
-+ /* this is cfs's dev_rdonly check */
-+ if (bio->bi_rw == WRITE &&
-+ dev_check_rdonly(bio->bi_bdev)) {
-+ bio_endio(bio, bio->bi_size, 0);
-+ break;
-+ }
-
+
++ /* this is cfs's dev_rdonly check */
++ if (bio->bi_rw == WRITE && dev_check_rdonly(bio->bi_bdev)) {
++ struct block_device *bdev = bio->bi_bdev;
++
++ printk(KERN_WARNING "Write to readonly device %s (%#x) "
++ "bi_flags: %lx, bi_vcnt: %d, bi_idx: %d, "
++ "bi->size: %d, bi_cnt: %d, bi_private: %p\n",
++ bdev->bd_disk ? bdev->bd_disk->disk_name : "",
++ bdev->bd_dev, bio->bi_flags, bio->bi_vcnt,
++ bio->bi_idx, bio->bi_size,
++ atomic_read(&bio->bi_cnt), bio->bi_private);
++ set_bit(BIO_RDONLY, &bio->bi_flags);
++ bio_endio(bio, bio->bi_size, 0);
++ clear_bit(BIO_RDONLY, &bio->bi_flags);
++ break;
++ }
++
/*
* If this device has partitions, remap block n
-@@ -3697,6 +3705,91 @@ void swap_io_context(struct io_context *
+@@ -3697,6 +3716,91 @@ void swap_io_context(struct io_context *
*ioc2 = temp;
}
EXPORT_SYMBOL(swap_io_context);
#define MODULE_ALIAS_BLOCKDEV(major,minor) \
MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
+Index: linux-2.6.16.i686/include/linux/bio.h
+===================================================================
+--- linux-2.6.16.i686.orig/include/linux/bio.h
++++ linux-2.6.16.i686/include/linux/bio.h
+@@ -124,6 +124,7 @@ struct bio {
+ #define BIO_BOUNCED 5 /* bio is a bounce bio */
+ #define BIO_USER_MAPPED 6 /* contains user pages */
+ #define BIO_EOPNOTSUPP 7 /* not supported */
++#define BIO_RDONLY 31 /* device is readonly */
+ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
+
+ /*
/**
* generic_make_request: hand a buffer to its device driver for I/O
* @bio: The bio describing the location in memory and on the device.
-@@ -3151,6 +3153,12 @@ end_io:
+@@ -3151,6 +3153,23 @@ end_io:
if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
goto end_io;
-+ /* this is cfs's dev_rdonly check */
-+ if (bio->bi_rw == WRITE &&
-+ dev_check_rdonly(bio->bi_bdev)) {
-+ bio_endio(bio, bio->bi_size, 0);
-+ break;
-+ }
-
+
++ /* this is cfs's dev_rdonly check */
++ if (bio->bi_rw == WRITE && dev_check_rdonly(bio->bi_bdev)) {
++ struct block_device *bdev = bio->bi_bdev;
++
++ printk(KERN_WARNING "Write to readonly device %s (%#x) "
++ "bi_flags: %lx, bi_vcnt: %d, bi_idx: %d, "
++ "bi->size: %d, bi_cnt: %d, bi_private: %p\n",
++ bdev->bd_disk ? bdev->bd_disk->disk_name : "",
++ bdev->bd_dev, bio->bi_flags, bio->bi_vcnt,
++ bio->bi_idx, bio->bi_size,
++ atomic_read(&bio->bi_cnt), bio->bi_private);
++ set_bit(BIO_RDONLY, &bio->bi_flags);
++ bio_endio(bio, bio->bi_size, 0);
++ clear_bit(BIO_RDONLY, &bio->bi_flags);
++ break;
++ }
++
/*
* If this device has partitions, remap block n
-@@ -3765,6 +3773,91 @@ void swap_io_context(struct io_context *
+@@ -3765,6 +3784,91 @@ void swap_io_context(struct io_context *
*ioc2 = temp;
}
EXPORT_SYMBOL(swap_io_context);
extern int set_blocksize(struct block_device *, int);
extern int sb_set_blocksize(struct super_block *, int);
extern int sb_min_blocksize(struct super_block *, int);
+Index: linux-2.6.18.1/include/linux/bio.h
+===================================================================
+--- linux-2.6.18.1.orig/include/linux/bio.h
++++ linux-2.6.18.1/include/linux/bio.h
+@@ -124,6 +124,7 @@ struct bio {
+ #define BIO_BOUNCED 5 /* bio is a bounce bio */
+ #define BIO_USER_MAPPED 6 /* contains user pages */
+ #define BIO_EOPNOTSUPP 7 /* not supported */
++#define BIO_RDONLY 31 /* device is readonly */
+ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
+
+ /*
/*
* Check whether this bio extends beyond the end of the device.
*/
-@@ -1436,6 +1438,12 @@
+@@ -1436,6 +1438,23 @@
if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
goto end_io;
-+ /* this is cfs's dev_rdonly check */
-+ if (bio->bi_rw == WRITE &&
-+ dev_check_rdonly(bio->bi_bdev)) {
-+ bio_endio(bio, 0);
-+ break;
-+ }
-
+
++ /* this is cfs's dev_rdonly check */
++ if (bio->bi_rw == WRITE && dev_check_rdonly(bio->bi_bdev)) {
++ struct block_device *bdev = bio->bi_bdev;
++
++ printk(KERN_WARNING "Write to readonly device %s (%#x) "
++ "bi_flags: %lx, bi_vcnt: %d, bi_idx: %d, "
++ "bi->size: %d, bi_cnt: %d, bi_private: %p\n",
++ bdev->bd_disk ? bdev->bd_disk->disk_name : "",
++ bdev->bd_dev, bio->bi_flags, bio->bi_vcnt,
++ bio->bi_idx, bio->bi_size,
++ atomic_read(&bio->bi_cnt), bio->bi_private);
++ set_bit(BIO_RDONLY, &bio->bi_flags);
++ bio_endio(bio, bio->bi_size, 0);
++ clear_bit(BIO_RDONLY, &bio->bi_flags);
++ break;
++ }
++
if (should_fail_request(bio))
goto end_io;
-@@ -2189,6 +2197,91 @@
+@@ -2189,6 +2208,91 @@
}
EXPORT_SYMBOL(kblockd_flush_work);
extern int set_blocksize(struct block_device *, int);
extern int sb_set_blocksize(struct super_block *, int);
extern int sb_min_blocksize(struct super_block *, int);
+Index: linux-2.6.27.21-0.1/include/linux/bio.h
+===================================================================
+--- linux-2.6.27.21-0.1.orig/include/linux/bio.h 2009-05-22 08:38:00.000000000 -0600
++++ linux-2.6.27.21-0.1/include/linux/bio.h 2009-05-22 08:38:02.000000000 -0600
+@@ -117,6 +117,7 @@
+ #define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */
+ #define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */
+ #define BIO_QUIET 11 /* Make BIO Quiet */
++#define BIO_RDONLY 31 /* device is readonly */
+ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
+
+ /*
struct bio_vec *bvl;
int i;
+ if (bio->bi_rw == WRITE &&
+ unlikely(test_and_clear_bit(BIO_RDONLY, &bio->bi_flags))) {
+ struct block_device *bdev = bio->bi_bdev;
+
+ CWARN("Write to readonly device %s (%#x) bi_flags: %lx, "
+ "bi_vcnt: %d, bi_idx: %d, bi->size: %d, bi_cnt: %d, "
+ "bi_private: %p, done: %u, error: %d\n",
+ bdev->bd_disk ? bdev->bd_disk->disk_name : "",
+ bdev->bd_dev, bio->bi_flags, bio->bi_vcnt, bio->bi_idx,
+ bio->bi_size, atomic_read(&bio->bi_cnt), bio->bi_private,
+ done, error);
+ }
+
/* CAVEAT EMPTOR: possibly in IRQ context
* DO NOT record procfs stats here!!! */