Whamcloud - gitweb
LU-14641 osd-ldiskfs: write commit declaring improvement 05/45505/2
authorWang Shilong <wshilong@ddn.com>
Mon, 26 Apr 2021 03:23:26 +0000 (11:23 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Sun, 14 Nov 2021 03:09:25 +0000 (03:09 +0000)
This patch try to:

1)extent bytes could be missed to increase with less than
1M, fix to to compare it with current value, and decay
it for every allocation.

2)with system space usage growing up, mballoc codes won't
try best to scan block group to align best free extent as
we can. So extent bytes per extent could be decayed to a
very small value, this could make us reserve too many credits.
We could be more optimistic in the credit reservations, even
in a case where the filesystem is nearly full, it is extremely
unlikely that the worst case would ever be hit.

3)Add extent bytes stats and debug ability to analysis
over reservation problem.

Lustre-change: https://review.whamcloud.com/43446
Lustre-commit: 0f81c5ae973bf7fba45b6ba7f9c5f4fb1f6eadcb

Signed-off-by: Wang Shilong <wshilong@ddn.com>
Change-Id: I357c4a855147ba26a9e9bbe9ab1269bcfd44e5f3
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/45505
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Li Dongyang <dongyangli@ddn.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/osd-ldiskfs/osd_io.c
lustre/osd-ldiskfs/osd_lproc.c

index f99f625..6b1f43d 100644 (file)
@@ -1210,12 +1210,12 @@ cont_map:
                        GOTO(cleanup, rc);
                /*
                 * decay extent blocks if we could allocate
-                * good large(1M) extent.
+                * good large extent.
                 */
-               if (previous_total == 0 &&
-                   total >= OSD_DEFAULT_EXTENT_BYTES >> inode->i_blkbits)
+               if (total - previous_total >=
+                   osd_extent_bytes(osd) >> inode->i_blkbits)
                        osd_decay_extent_bytes(osd,
-                                              total << inode->i_blkbits);
+                               (total - previous_total) << inode->i_blkbits);
                /* look for next extent */
                fp = NULL;
                blocks += blocks_per_page * clen;
@@ -1360,6 +1360,7 @@ static int osd_is_mapped(struct dt_object *dt, __u64 offset,
        return cached_extent->mapped;
 }
 
+#define MAX_EXTENTS_PER_WRITE 100
 static int osd_declare_write_commit(const struct lu_env *env,
                                    struct dt_object *dt,
                                    struct niobuf_local *lnb, int npages,
@@ -1425,7 +1426,7 @@ static int osd_declare_write_commit(const struct lu_env *env,
                if (lnb[i].lnb_file_offset != extent.end || extent.end == 0) {
                        if (extent.end != 0)
                                extents += (extent.end - extent.start +
-                                       extent_bytes - 1) / extent_bytes;
+                                           extent_bytes - 1) / extent_bytes;
                        extent.start = lnb[i].lnb_file_offset;
                        extent.end = lnb[i].lnb_file_offset + lnb[i].lnb_len;
                } else {
@@ -1445,6 +1446,18 @@ static int osd_declare_write_commit(const struct lu_env *env,
 
        extents += (extent.end - extent.start +
                    extent_bytes - 1) / extent_bytes;
+       /**
+        * with system space usage growing up, mballoc codes won't
+        * try best to scan block group to align best free extent as
+        * we can. So extent bytes per extent could be decayed to a
+        * very small value, this could make us reserve too many credits.
+        * We could be more optimistic in the credit reservations, even
+        * in a case where the filesystem is nearly full, it is extremely
+        * unlikely that the worst case would ever be hit.
+        */
+       if (extents > MAX_EXTENTS_PER_WRITE)
+               extents = MAX_EXTENTS_PER_WRITE;
+
        /*
         * each extent can go into new leaf causing a split
         * 5 is max tree depth: inode + 4 index blocks
@@ -1487,6 +1500,11 @@ static int osd_declare_write_commit(const struct lu_env *env,
        else
                credits += extents;
 
+       CDEBUG(D_INODE,
+              "%s: inode #%lu extent_bytes %u extents %d credits %d\n",
+              osd_ino2name(inode), inode->i_ino, extent_bytes, extents,
+              credits);
+
 out_declare:
        osd_trans_declare_op(env, oh, OSD_OT_WRITE, credits);
 
index 97efb36..42fa6c3 100644 (file)
@@ -575,6 +575,24 @@ static ssize_t full_scrub_threshold_rate_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(full_scrub_threshold_rate);
 
+static ssize_t extent_bytes_allocation_show(struct kobject *kobj,
+                                           struct attribute *attr, char *buf)
+{
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osd_device *dev = osd_dt_dev(dt);
+       int i;
+       unsigned int min = (unsigned int)(~0), cur;
+
+       for_each_online_cpu(i) {
+               cur = *per_cpu_ptr(dev->od_extent_bytes_percpu, i);
+               if (cur < min)
+                       min = cur;
+       }
+       return snprintf(buf, PAGE_SIZE, "%u\n", min);
+}
+LUSTRE_RO_ATTR(extent_bytes_allocation);
+
 static int ldiskfs_osd_oi_scrub_seq_show(struct seq_file *m, void *data)
 {
        struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private);
@@ -864,6 +882,7 @@ static struct attribute *ldiskfs_attrs[] = {
        &lustre_attr_pdo.attr,
        &lustre_attr_full_scrub_ratio.attr,
        &lustre_attr_full_scrub_threshold_rate.attr,
+       &lustre_attr_extent_bytes_allocation.attr,
        NULL,
 };