From 0f81c5ae973bf7fba45b6ba7f9c5f4fb1f6eadcb Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Mon, 26 Apr 2021 11:23:26 +0800 Subject: [PATCH] LU-14641 osd-ldiskfs: write commit declaring improvement This patch try to: 1)extent bytes could be missed to increase with less than 1M, fix to to compare it with current value, and decay it for every allocation. 2)with system space usage growing up, mballoc codes won't try best to scan block group to align best free extent as we can. So extent bytes per extent could be decayed to a very small value, this could make us reserve too many credits. We could be more optimistic in the credit reservations, even in a case where the filesystem is nearly full, it is extremely unlikely that the worst case would ever be hit. 3)Add extent bytes stats and debug ability to analysis over reservation problem. Signed-off-by: Wang Shilong Change-Id: I357c4a855147ba26a9e9bbe9ab1269bcfd44e5f3 Reviewed-on: https://review.whamcloud.com/43446 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lustre/osd-ldiskfs/osd_io.c | 28 +++++++++++++++++++++++----- lustre/osd-ldiskfs/osd_lproc.c | 19 +++++++++++++++++++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 349e845..f8568c7 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -1209,12 +1209,12 @@ cont_map: GOTO(cleanup, rc); /* * decay extent blocks if we could allocate - * good large(1M) extent. + * good large extent. */ - if (previous_total == 0 && - total >= OSD_DEFAULT_EXTENT_BYTES >> inode->i_blkbits) + if (total - previous_total >= + osd_extent_bytes(osd) >> inode->i_blkbits) osd_decay_extent_bytes(osd, - total << inode->i_blkbits); + (total - previous_total) << inode->i_blkbits); /* look for next extent */ fp = NULL; blocks += blocks_per_page * clen; @@ -1355,6 +1355,7 @@ static int osd_is_mapped(struct dt_object *dt, __u64 offset, return cached_extent->mapped; } +#define MAX_EXTENTS_PER_WRITE 100 static int osd_declare_write_commit(const struct lu_env *env, struct dt_object *dt, struct niobuf_local *lnb, int npages, @@ -1420,7 +1421,7 @@ static int osd_declare_write_commit(const struct lu_env *env, if (lnb[i].lnb_file_offset != extent.end || extent.end == 0) { if (extent.end != 0) extents += (extent.end - extent.start + - extent_bytes - 1) / extent_bytes; + extent_bytes - 1) / extent_bytes; extent.start = lnb[i].lnb_file_offset; extent.end = lnb[i].lnb_file_offset + lnb[i].lnb_len; } else { @@ -1440,6 +1441,18 @@ static int osd_declare_write_commit(const struct lu_env *env, extents += (extent.end - extent.start + extent_bytes - 1) / extent_bytes; + /** + * with system space usage growing up, mballoc codes won't + * try best to scan block group to align best free extent as + * we can. So extent bytes per extent could be decayed to a + * very small value, this could make us reserve too many credits. + * We could be more optimistic in the credit reservations, even + * in a case where the filesystem is nearly full, it is extremely + * unlikely that the worst case would ever be hit. + */ + if (extents > MAX_EXTENTS_PER_WRITE) + extents = MAX_EXTENTS_PER_WRITE; + /* * each extent can go into new leaf causing a split * 5 is max tree depth: inode + 4 index blocks @@ -1482,6 +1495,11 @@ static int osd_declare_write_commit(const struct lu_env *env, else credits += extents; + CDEBUG(D_INODE, + "%s: inode #%lu extent_bytes %u extents %d credits %d\n", + osd_ino2name(inode), inode->i_ino, extent_bytes, extents, + credits); + out_declare: osd_trans_declare_op(env, oh, OSD_OT_WRITE, credits); diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index 4e787d5..906e53a 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -574,6 +574,24 @@ static ssize_t full_scrub_threshold_rate_store(struct kobject *kobj, } LUSTRE_RW_ATTR(full_scrub_threshold_rate); +static ssize_t extent_bytes_allocation_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct dt_device *dt = container_of(kobj, struct dt_device, + dd_kobj); + struct osd_device *dev = osd_dt_dev(dt); + int i; + unsigned int min = (unsigned int)(~0), cur; + + for_each_online_cpu(i) { + cur = *per_cpu_ptr(dev->od_extent_bytes_percpu, i); + if (cur < min) + min = cur; + } + return snprintf(buf, PAGE_SIZE, "%u\n", min); +} +LUSTRE_RO_ATTR(extent_bytes_allocation); + static int ldiskfs_osd_oi_scrub_seq_show(struct seq_file *m, void *data) { struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private); @@ -863,6 +881,7 @@ static struct attribute *ldiskfs_attrs[] = { &lustre_attr_pdo.attr, &lustre_attr_full_scrub_ratio.attr, &lustre_attr_full_scrub_threshold_rate.attr, + &lustre_attr_extent_bytes_allocation.attr, NULL, }; -- 1.8.3.1