Whamcloud - gitweb
b=16680 diagnostic patch to verify lustre read-only device mechanism
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext3-mballoc-pa_free-mismatch.patch
diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc-pa_free-mismatch.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc-pa_free-mismatch.patch
new file mode 100644 (file)
index 0000000..311f120
--- /dev/null
@@ -0,0 +1,150 @@
+Index: linux-stage/fs/ext3/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext3/mballoc.c 2010-01-26 23:36:25.000000000 +0800
++++ linux-stage/fs/ext3/mballoc.c      2010-01-26 23:42:57.000000000 +0800
+@@ -35,6 +35,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/seq_file.h>
+ #include <linux/version.h>
++#include <linux/genhd.h>
+ #include "group.h"
+@@ -360,6 +361,7 @@
+       unsigned short          pa_free;        /* how many blocks are free */
+       unsigned short          pa_linear;      /* consumed in one direction
+                                                * strictly, for group prealloc */
++      unsigned short          pa_error;
+       spinlock_t              *pa_obj_lock;
+       struct inode            *pa_inode;      /* hack, for history only */
+ };
+@@ -3558,6 +3560,7 @@
+       spin_lock_init(&pa->pa_lock);
+       pa->pa_deleted = 0;
+       pa->pa_linear = 0;
++      pa->pa_error = 0;
+       mb_debug("new inode pa %p: %lu/%lu for %lu\n", pa,
+                       pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+@@ -3615,6 +3618,7 @@
+       spin_lock_init(&pa->pa_lock);
+       pa->pa_deleted = 0;
+       pa->pa_linear = 1;
++      pa->pa_error = 0;
+       mb_debug("new group pa %p: %lu/%lu for %lu\n", pa,
+                       pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+@@ -3671,7 +3675,10 @@
+       sector_t start;
+       int err = 0, free = 0;
++      BUG_ON(!ext3_is_group_locked(sb, e3b->bd_group));
+       BUG_ON(pa->pa_deleted == 0);
++      BUG_ON(pa->pa_linear != 0);
++      BUG_ON(pa->pa_inode == NULL);
+       ext3_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
+       BUG_ON(group != e3b->bd_group && pa->pa_len != 0);
+       end = bit + pa->pa_len;
+@@ -3704,14 +3711,19 @@
+               mb_free_blocks(pa->pa_inode, e3b, bit, next - bit);
+               bit = next + 1;
+       }
+-      if (free != pa->pa_free) {
+-              printk("pa %p: logic %lu, phys. %lu, len %lu\n",
+-                      pa, (unsigned long) pa->pa_lstart,
+-                      (unsigned long) pa->pa_pstart,
+-                      (unsigned long) pa->pa_len);
+-              printk("free %u, pa_free %u\n", free, pa->pa_free);
+-      }
+-      BUG_ON(free != pa->pa_free);
++
++      /* "free < pa->pa_free" means we maybe double alloc the same blocks,
++       * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
++      if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free))
++              ext3_error(sb, __FUNCTION__, "pa free mismatch: [pa %p] "
++                            "[phy %lu] [logic %lu] [len %u] [free %u] "
++                            "[error %u] [inode %lu] [freed %u]", pa,
++                            (unsigned long)pa->pa_pstart,
++                            (unsigned long)pa->pa_lstart,
++                            (unsigned)pa->pa_len, (unsigned)pa->pa_free,
++                            (unsigned)pa->pa_error, pa->pa_inode->i_ino,
++                            free);
++      BUG_ON(pa->pa_free != free);
+       atomic_add(free, &sbi->s_mb_discarded);
+       return err;
+@@ -4189,6 +4201,19 @@
+               *errp = -EDQUOT;
+               return 0;
+       }
++
++      if (dev_check_rdonly(sb->s_bdev)) {
++              struct block_device *bdev = sb->s_bdev;
++
++              printk(KERN_WARNING "Alloc from readonly device %s (%#x): "
++                     "[inode %lu] [logic %lu] [goal %lu] [ll %lu] [pl %lu] "
++                     "[lr %lu] [pr %lu] [len %lu] [flags %lu]\n",
++                     bdev->bd_disk ? bdev->bd_disk->disk_name : "",
++                     bdev->bd_dev, ar->inode->i_ino, ar->logical, ar->goal,
++                     ar->lleft, ar->pleft, ar->lright, ar->pright, ar->len,
++                     ar->flags);
++      }
++
+       inquota = ar->len;
+       ext3_mb_poll_new_transaction(sb, handle);
+@@ -4217,10 +4242,34 @@
+       }
+       if (likely(ac.ac_status == AC_STATUS_FOUND)) {
+-              ext3_mb_mark_diskspace_used(&ac, handle);
+-              *errp = 0;
+-              block = ext3_grp_offs_to_block(sb, &ac.ac_b_ex);
+-              ar->len = ac.ac_b_ex.fe_len;
++              *errp = ext3_mb_mark_diskspace_used(&ac, handle);
++              if (!*errp) {
++                      block = ext3_grp_offs_to_block(sb, &ac.ac_b_ex);
++                      ar->len = ac.ac_b_ex.fe_len;
++              } else {
++                        ac.ac_b_ex.fe_len = 0;
++                        ar->len = 0;
++                        ext3_mb_show_ac(&ac);
++                        if (ac.ac_pa) {
++                                struct ext3_prealloc_space *pa = ac.ac_pa;
++
++                              /* We can not make sure whether the bitmap has
++                               * been updated or not when fail case. So can
++                               * not revert pa_free back, just mark pa_error*/
++                              pa->pa_error++;
++                                ext3_error(sb, __FUNCTION__,
++                                            "Updating bitmap error: [err %d] "
++                                            "[pa %p] [phy %lu] [logic %lu] "
++                                            "[len %u] [free %u] [error %u] "
++                                            "[inode %lu]", *errp, pa,
++                                            (unsigned long)pa->pa_pstart,
++                                            (unsigned long)pa->pa_lstart,
++                                            (unsigned)pa->pa_len,
++                                            (unsigned)pa->pa_free,
++                                            (unsigned)pa->pa_error,
++                                      pa->pa_inode ? pa->pa_inode->i_ino : 0);
++                        }
++              }
+       } else {
+               freed  = ext3_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len);
+               if (freed)
+@@ -4388,6 +4437,15 @@
+               goto error_return;
+       }
++      if (dev_check_rdonly(sb->s_bdev)) {
++              struct block_device *bdev = sb->s_bdev;
++
++              printk(KERN_WARNING "Release to readonly device %s (%#x): "
++                     "[inode %lu] [block %lu] [count %lu] [is_meta %d]\n",
++                     bdev->bd_disk ? bdev->bd_disk->disk_name : "",
++                     bdev->bd_dev, inode->i_ino, block, count, metadata);
++      }
++
+       ext3_debug("freeing block %lu\n", block);
+       ac.ac_op = EXT3_MB_HISTORY_FREE;