From: Andrew Perepechko Date: Thu, 10 Sep 2015 13:08:58 +0000 (+0300) Subject: LU-7132 osd-ldiskfs: speedup rewrites X-Git-Tag: 2.7.61~25 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=144b5a65c16f1bca81ae76010528a592c821308b LU-7132 osd-ldiskfs: speedup rewrites This patch slightly speeds up rewrites on OST by replacing bmap calls with fiemap calls. This patch also includes a fiemap deadlock fix created by Alexey Lyashkov. Change-Id: I8af6350a0049a14a3e29304087064ecdffc1be89 Signed-off-by: Andrew Perepechko Signed-off-by: Alexey Lyashkov Reviewed-by: Artem Blagodarenko Reviewed-by: Alexander Zarochentsev Xyratex-bug-id: MRP-2559 Xyratex-bug-id: MRP-2688 Reviewed-on: http://review.whamcloud.com/16360 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: James Simmons --- diff --git a/ldiskfs/kernel_patches/patches/rhel6.3/ext4-notalloc_under_idatasem.patch b/ldiskfs/kernel_patches/patches/rhel6.3/ext4-notalloc_under_idatasem.patch new file mode 100644 index 0000000..bd6efcd --- /dev/null +++ b/ldiskfs/kernel_patches/patches/rhel6.3/ext4-notalloc_under_idatasem.patch @@ -0,0 +1,17 @@ +--- /dev/null ++++ b/ldiskfs/kernel_patches/patches/rhel6.3/ext4-notalloc_under_idatasem.patch +@@ -0,0 +1,14 @@ ++Index: linux-stage/fs/ext4/extents.c ++=================================================================== ++--- linux-stage.orig/fs/ext4/extents.c 2015-07-13 22:22:56.000000000 +0300 +++++ linux-stage/fs/ext4/extents.c 2015-07-13 22:24:05.000000000 +0300 ++@@ -4318,7 +4318,8 @@ static int ext4_find_delayed_extent(stru ++ struct buffer_head *head = NULL; ++ unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *); ++ ++- pages = kmalloc(PAGE_SIZE, GFP_KERNEL); +++ /* we are running under i_data_sem so don't re-enter fs code */ +++ pages = kmalloc(PAGE_SIZE, GFP_NOFS); ++ if (pages == NULL) ++ return -ENOMEM; ++ diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.4.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.4.series index ec2670c..daeda62 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.4.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.4.series @@ -42,3 +42,4 @@ rhel6.3/ext4-not-discard-preallocation-umount.patch rhel6.3/ext4-journal-path-opt.patch rhel6.3/ext4-recalc-percpu-counters-after-journal.patch rhel6.3/ext4-drop-inode-from-orphan-list-if-ext4_delete_inode-fails.patch +rhel6.3/ext4-notalloc_under_idatasem.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series index 8204382..abea8ea 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series @@ -44,3 +44,4 @@ rhel6.4/ext4-max-dir-size-options.patch rhel6.3/ext4-not-discard-preallocation-umount.patch rhel6.3/ext4-journal-path-opt.patch rhel6.3/ext4-drop-inode-from-orphan-list-if-ext4_delete_inode-fails.patch +rhel6.3/ext4-notalloc_under_idatasem.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.6.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.6.series index 841cd67..568162f 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.6.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.6.series @@ -44,3 +44,4 @@ rhel6.3/ext4-not-discard-preallocation-umount.patch rhel6.3/ext4-journal-path-opt.patch rhel6.3/ext4-drop-inode-from-orphan-list-if-ext4_delete_inode-fails.patch rhel6.6/ext4-remove-truncate-warning.patch +rhel6.3/ext4-notalloc_under_idatasem.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.7.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.7.series index 832e5b1..6b526f3 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.7.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.7.series @@ -44,3 +44,4 @@ rhel6.3/ext4-not-discard-preallocation-umount.patch rhel6.3/ext4-journal-path-opt.patch rhel6.6/ext4-remove-truncate-warning.patch rhel6.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch +rhel6.3/ext4-notalloc_under_idatasem.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series index 7d822b7..fa841e6 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series @@ -40,3 +40,4 @@ rhel6.3/ext4-max-dir-size-options.patch rhel6.3/ext4-not-discard-preallocation-umount.patch rhel6.3/ext4-journal-path-opt.patch rhel6.3/ext4-recalc-percpu-counters-after-journal.patch +rhel6.3/ext4-notalloc_under_idatasem.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series index 9bbf666..f3a9d2f 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series @@ -41,3 +41,4 @@ sles11sp1/ext4-max-dir-size-options.patch rhel6.3/ext4-not-discard-preallocation-umount.patch rhel6.3/ext4-journal-path-opt.patch rhel6.3/ext4-recalc-percpu-counters-after-journal.patch +rhel6.3/ext4-notalloc_under_idatasem.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11.series b/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11.series index 6b6d9d2..4728457 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11.series @@ -37,3 +37,4 @@ rhel6.3/ext4-max-dir-size.patch sles11sp2/ext4-max-dir-size-options.patch rhel6.3/ext4-not-discard-preallocation-umount.patch rhel6.3/ext4-journal-path-opt.patch +rhel6.3/ext4-notalloc_under_idatasem.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series b/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series index 5932eb7..d4cfd2a 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series @@ -37,3 +37,4 @@ sles11sp2/ext4-max-dir-size-options.patch rhel6.3/ext4-not-discard-preallocation-umount.patch rhel6.3/ext4-journal-path-opt.patch sles11sp2/ext4-corrupted-inode-block-bitmaps-handling-patches.patch +rhel6.3/ext4-notalloc_under_idatasem.patch diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 01b574b..98da270 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -1039,29 +1039,57 @@ static int osd_write_prep(const struct lu_env *env, struct dt_object *dt, RETURN(rc); } -/* Check if a block is allocated or not */ -static int osd_is_mapped(struct inode *inode, u64 offset) -{ - sector_t (*fs_bmap)(struct address_space *, sector_t); +struct osd_fextent { + sector_t start; + sector_t end; + unsigned int mapped:1; +}; - fs_bmap = inode->i_mapping->a_ops->bmap; +static int osd_is_mapped(struct dt_object *dt, __u64 offset, + struct osd_fextent *cached_extent) +{ + struct inode *inode = osd_dt_obj(dt)->oo_inode; + sector_t block = offset >> inode->i_blkbits; + sector_t start; + struct fiemap_extent_info fei = { 0 }; + struct fiemap_extent fe = { 0 }; + mm_segment_t saved_fs; + int rc; - /* We can't know if we are overwriting or not */ - if (unlikely(fs_bmap == NULL)) - return 0; + if (block >= cached_extent->start && block < cached_extent->end) + return cached_extent->mapped; if (i_size_read(inode) == 0) return 0; /* Beyond EOF, must not be mapped */ - if (((i_size_read(inode) - 1) >> inode->i_blkbits) < - (offset >> inode->i_blkbits)) + if (((i_size_read(inode) - 1) >> inode->i_blkbits) < block) return 0; - if (fs_bmap(inode->i_mapping, offset >> inode->i_blkbits) == 0) + fei.fi_extents_max = 1; + fei.fi_extents_start = &fe; + + saved_fs = get_fs(); + set_fs(get_ds()); + rc = inode->i_op->fiemap(inode, &fei, offset, FIEMAP_MAX_OFFSET-offset); + set_fs(saved_fs); + if (rc != 0) return 0; - return 1; + start = fe.fe_logical >> inode->i_blkbits; + + if (start > block) { + cached_extent->start = block; + cached_extent->end = start; + cached_extent->mapped = 0; + } else { + cached_extent->start = start; + cached_extent->end = (fe.fe_logical + fe.fe_length) >> + inode->i_blkbits; + cached_extent->mapped = 1; + } + + return cached_extent->mapped; } static int osd_declare_write_commit(const struct lu_env *env, @@ -1081,6 +1109,7 @@ static int osd_declare_write_commit(const struct lu_env *env, int credits = 0; bool ignore_quota = false; long long quota_space = 0; + struct osd_fextent extent = { 0 }; ENTRY; LASSERT(handle != NULL); @@ -1095,7 +1124,7 @@ static int osd_declare_write_commit(const struct lu_env *env, lnb[i - 1].lnb_file_offset + lnb[i - 1].lnb_len) extents++; - if (!osd_is_mapped(inode, lnb[i].lnb_file_offset)) + if (!osd_is_mapped(dt, lnb[i].lnb_file_offset, &extent)) quota_space += PAGE_CACHE_SIZE; /* ignore quota for the whole request if any page is from @@ -1186,6 +1215,7 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt, struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt)); loff_t isize; int rc = 0, i; + struct osd_fextent extent = { 0 }; LASSERT(inode); @@ -1198,7 +1228,7 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt, for (i = 0; i < npages; i++) { if (lnb[i].lnb_rc == -ENOSPC && - osd_is_mapped(inode, lnb[i].lnb_file_offset)) { + osd_is_mapped(dt, lnb[i].lnb_file_offset, &extent)) { /* Allow the write to proceed if overwriting an * existing block */ lnb[i].lnb_rc = 0;