Whamcloud - gitweb
LU-7132 osd-ldiskfs: speedup rewrites 60/16360/4
authorAndrew Perepechko <andrew.perepechko@seagate.com>
Thu, 10 Sep 2015 13:08:58 +0000 (16:08 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 22 Sep 2015 23:23:32 +0000 (23:23 +0000)
This patch slightly speeds up rewrites on OST
by replacing bmap calls with fiemap calls.

This patch also includes a fiemap deadlock fix
created by Alexey Lyashkov.

Change-Id: I8af6350a0049a14a3e29304087064ecdffc1be89
Signed-off-by: Andrew Perepechko <andrew.perepechko@seagate.com>
Signed-off-by: Alexey Lyashkov <alexey.lyashkov@seagate.com>
Reviewed-by: Artem Blagodarenko <artem.blagodarenko@seagate.com>
Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev@seagate.com>
Xyratex-bug-id: MRP-2559
Xyratex-bug-id: MRP-2688
Reviewed-on: http://review.whamcloud.com/16360
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
ldiskfs/kernel_patches/patches/rhel6.3/ext4-notalloc_under_idatasem.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.4.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.6.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.7.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series
ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11.series
ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series
lustre/osd-ldiskfs/osd_io.c

diff --git a/ldiskfs/kernel_patches/patches/rhel6.3/ext4-notalloc_under_idatasem.patch b/ldiskfs/kernel_patches/patches/rhel6.3/ext4-notalloc_under_idatasem.patch
new file mode 100644 (file)
index 0000000..bd6efcd
--- /dev/null
@@ -0,0 +1,17 @@
+--- /dev/null
++++ b/ldiskfs/kernel_patches/patches/rhel6.3/ext4-notalloc_under_idatasem.patch
+@@ -0,0 +1,14 @@
++Index: linux-stage/fs/ext4/extents.c
++===================================================================
++--- linux-stage.orig/fs/ext4/extents.c        2015-07-13 22:22:56.000000000 +0300
+++++ linux-stage/fs/ext4/extents.c     2015-07-13 22:24:05.000000000 +0300
++@@ -4318,7 +4318,8 @@ static int ext4_find_delayed_extent(stru
++      struct buffer_head *head = NULL;
++      unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
++ 
++-     pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
+++     /* we are running under i_data_sem so don't re-enter fs code */
+++     pages = kmalloc(PAGE_SIZE, GFP_NOFS);
++      if (pages == NULL)
++              return -ENOMEM;
++ 
index ec2670c..daeda62 100644 (file)
@@ -42,3 +42,4 @@ rhel6.3/ext4-not-discard-preallocation-umount.patch
 rhel6.3/ext4-journal-path-opt.patch
 rhel6.3/ext4-recalc-percpu-counters-after-journal.patch
 rhel6.3/ext4-drop-inode-from-orphan-list-if-ext4_delete_inode-fails.patch
+rhel6.3/ext4-notalloc_under_idatasem.patch
index 8204382..abea8ea 100644 (file)
@@ -44,3 +44,4 @@ rhel6.4/ext4-max-dir-size-options.patch
 rhel6.3/ext4-not-discard-preallocation-umount.patch
 rhel6.3/ext4-journal-path-opt.patch
 rhel6.3/ext4-drop-inode-from-orphan-list-if-ext4_delete_inode-fails.patch
+rhel6.3/ext4-notalloc_under_idatasem.patch
index 841cd67..568162f 100644 (file)
@@ -44,3 +44,4 @@ rhel6.3/ext4-not-discard-preallocation-umount.patch
 rhel6.3/ext4-journal-path-opt.patch
 rhel6.3/ext4-drop-inode-from-orphan-list-if-ext4_delete_inode-fails.patch
 rhel6.6/ext4-remove-truncate-warning.patch
+rhel6.3/ext4-notalloc_under_idatasem.patch
index 832e5b1..6b526f3 100644 (file)
@@ -44,3 +44,4 @@ rhel6.3/ext4-not-discard-preallocation-umount.patch
 rhel6.3/ext4-journal-path-opt.patch
 rhel6.6/ext4-remove-truncate-warning.patch
 rhel6.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
+rhel6.3/ext4-notalloc_under_idatasem.patch
index 7d822b7..fa841e6 100644 (file)
@@ -40,3 +40,4 @@ rhel6.3/ext4-max-dir-size-options.patch
 rhel6.3/ext4-not-discard-preallocation-umount.patch
 rhel6.3/ext4-journal-path-opt.patch
 rhel6.3/ext4-recalc-percpu-counters-after-journal.patch
+rhel6.3/ext4-notalloc_under_idatasem.patch
index 9bbf666..f3a9d2f 100644 (file)
@@ -41,3 +41,4 @@ sles11sp1/ext4-max-dir-size-options.patch
 rhel6.3/ext4-not-discard-preallocation-umount.patch
 rhel6.3/ext4-journal-path-opt.patch
 rhel6.3/ext4-recalc-percpu-counters-after-journal.patch
+rhel6.3/ext4-notalloc_under_idatasem.patch
index 6b6d9d2..4728457 100644 (file)
@@ -37,3 +37,4 @@ rhel6.3/ext4-max-dir-size.patch
 sles11sp2/ext4-max-dir-size-options.patch
 rhel6.3/ext4-not-discard-preallocation-umount.patch
 rhel6.3/ext4-journal-path-opt.patch
+rhel6.3/ext4-notalloc_under_idatasem.patch
index 5932eb7..d4cfd2a 100644 (file)
@@ -37,3 +37,4 @@ sles11sp2/ext4-max-dir-size-options.patch
 rhel6.3/ext4-not-discard-preallocation-umount.patch
 rhel6.3/ext4-journal-path-opt.patch
 sles11sp2/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
+rhel6.3/ext4-notalloc_under_idatasem.patch
index 01b574b..98da270 100644 (file)
@@ -1039,29 +1039,57 @@ static int osd_write_prep(const struct lu_env *env, struct dt_object *dt,
         RETURN(rc);
 }
 
-/* Check if a block is allocated or not */
-static int osd_is_mapped(struct inode *inode, u64 offset)
-{
-       sector_t (*fs_bmap)(struct address_space *, sector_t);
+struct osd_fextent {
+       sector_t        start;
+       sector_t        end;
+       unsigned int    mapped:1;
+};
 
-       fs_bmap = inode->i_mapping->a_ops->bmap;
+static int osd_is_mapped(struct dt_object *dt, __u64 offset,
+                        struct osd_fextent *cached_extent)
+{
+       struct inode *inode = osd_dt_obj(dt)->oo_inode;
+       sector_t block = offset >> inode->i_blkbits;
+       sector_t start;
+       struct fiemap_extent_info fei = { 0 };
+       struct fiemap_extent fe = { 0 };
+       mm_segment_t saved_fs;
+       int rc;
 
-       /* We can't know if we are overwriting or not */
-       if (unlikely(fs_bmap == NULL))
-               return 0;
+       if (block >= cached_extent->start && block < cached_extent->end)
+               return cached_extent->mapped;
 
        if (i_size_read(inode) == 0)
                return 0;
 
        /* Beyond EOF, must not be mapped */
-       if (((i_size_read(inode) - 1) >> inode->i_blkbits) <
-           (offset >> inode->i_blkbits))
+       if (((i_size_read(inode) - 1) >> inode->i_blkbits) < block)
                return 0;
 
-       if (fs_bmap(inode->i_mapping, offset >> inode->i_blkbits) == 0)
+       fei.fi_extents_max = 1;
+       fei.fi_extents_start = &fe;
+
+       saved_fs = get_fs();
+       set_fs(get_ds());
+       rc = inode->i_op->fiemap(inode, &fei, offset, FIEMAP_MAX_OFFSET-offset);
+       set_fs(saved_fs);
+       if (rc != 0)
                return 0;
 
-       return 1;
+       start = fe.fe_logical >> inode->i_blkbits;
+
+       if (start > block) {
+               cached_extent->start = block;
+               cached_extent->end = start;
+               cached_extent->mapped = 0;
+       } else {
+               cached_extent->start = start;
+               cached_extent->end = (fe.fe_logical + fe.fe_length) >>
+                                     inode->i_blkbits;
+               cached_extent->mapped = 1;
+       }
+
+       return cached_extent->mapped;
 }
 
 static int osd_declare_write_commit(const struct lu_env *env,
@@ -1081,6 +1109,7 @@ static int osd_declare_write_commit(const struct lu_env *env,
        int                      credits = 0;
        bool                     ignore_quota = false;
        long long                quota_space = 0;
+       struct osd_fextent       extent = { 0 };
        ENTRY;
 
         LASSERT(handle != NULL);
@@ -1095,7 +1124,7 @@ static int osd_declare_write_commit(const struct lu_env *env,
                    lnb[i - 1].lnb_file_offset + lnb[i - 1].lnb_len)
                        extents++;
 
-               if (!osd_is_mapped(inode, lnb[i].lnb_file_offset))
+               if (!osd_is_mapped(dt, lnb[i].lnb_file_offset, &extent))
                        quota_space += PAGE_CACHE_SIZE;
 
                /* ignore quota for the whole request if any page is from
@@ -1186,6 +1215,7 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt,
         struct osd_device  *osd = osd_obj2dev(osd_dt_obj(dt));
         loff_t isize;
         int rc = 0, i;
+       struct osd_fextent extent = { 0 };
 
         LASSERT(inode);
 
@@ -1198,7 +1228,7 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt,
 
         for (i = 0; i < npages; i++) {
                if (lnb[i].lnb_rc == -ENOSPC &&
-                   osd_is_mapped(inode, lnb[i].lnb_file_offset)) {
+                   osd_is_mapped(dt, lnb[i].lnb_file_offset, &extent)) {
                        /* Allow the write to proceed if overwriting an
                         * existing block */
                        lnb[i].lnb_rc = 0;