Whamcloud - gitweb
LU-14641 osd-ldiskfs: write commit declaring improvement 46/43446/7
authorWang Shilong <wshilong@ddn.com>
Mon, 26 Apr 2021 03:23:26 +0000 (11:23 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 11 May 2021 22:53:54 +0000 (22:53 +0000)
This patch try to:

1)extent bytes could be missed to increase with less than
1M, fix to to compare it with current value, and decay
it for every allocation.

2)with system space usage growing up, mballoc codes won't
try best to scan block group to align best free extent as
we can. So extent bytes per extent could be decayed to a
very small value, this could make us reserve too many credits.
We could be more optimistic in the credit reservations, even
in a case where the filesystem is nearly full, it is extremely
unlikely that the worst case would ever be hit.

3)Add extent bytes stats and debug ability to analysis
over reservation problem.

Signed-off-by: Wang Shilong <wshilong@ddn.com>
Change-Id: I357c4a855147ba26a9e9bbe9ab1269bcfd44e5f3
Reviewed-on: https://review.whamcloud.com/43446
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/osd-ldiskfs/osd_io.c
lustre/osd-ldiskfs/osd_lproc.c

index 349e845..f8568c7 100644 (file)
@@ -1209,12 +1209,12 @@ cont_map:
                        GOTO(cleanup, rc);
                /*
                 * decay extent blocks if we could allocate
-                * good large(1M) extent.
+                * good large extent.
                 */
-               if (previous_total == 0 &&
-                   total >= OSD_DEFAULT_EXTENT_BYTES >> inode->i_blkbits)
+               if (total - previous_total >=
+                   osd_extent_bytes(osd) >> inode->i_blkbits)
                        osd_decay_extent_bytes(osd,
-                                              total << inode->i_blkbits);
+                               (total - previous_total) << inode->i_blkbits);
                /* look for next extent */
                fp = NULL;
                blocks += blocks_per_page * clen;
@@ -1355,6 +1355,7 @@ static int osd_is_mapped(struct dt_object *dt, __u64 offset,
        return cached_extent->mapped;
 }
 
+#define MAX_EXTENTS_PER_WRITE 100
 static int osd_declare_write_commit(const struct lu_env *env,
                                    struct dt_object *dt,
                                    struct niobuf_local *lnb, int npages,
@@ -1420,7 +1421,7 @@ static int osd_declare_write_commit(const struct lu_env *env,
                if (lnb[i].lnb_file_offset != extent.end || extent.end == 0) {
                        if (extent.end != 0)
                                extents += (extent.end - extent.start +
-                                       extent_bytes - 1) / extent_bytes;
+                                           extent_bytes - 1) / extent_bytes;
                        extent.start = lnb[i].lnb_file_offset;
                        extent.end = lnb[i].lnb_file_offset + lnb[i].lnb_len;
                } else {
@@ -1440,6 +1441,18 @@ static int osd_declare_write_commit(const struct lu_env *env,
 
        extents += (extent.end - extent.start +
                    extent_bytes - 1) / extent_bytes;
+       /**
+        * with system space usage growing up, mballoc codes won't
+        * try best to scan block group to align best free extent as
+        * we can. So extent bytes per extent could be decayed to a
+        * very small value, this could make us reserve too many credits.
+        * We could be more optimistic in the credit reservations, even
+        * in a case where the filesystem is nearly full, it is extremely
+        * unlikely that the worst case would ever be hit.
+        */
+       if (extents > MAX_EXTENTS_PER_WRITE)
+               extents = MAX_EXTENTS_PER_WRITE;
+
        /*
         * each extent can go into new leaf causing a split
         * 5 is max tree depth: inode + 4 index blocks
@@ -1482,6 +1495,11 @@ static int osd_declare_write_commit(const struct lu_env *env,
        else
                credits += extents;
 
+       CDEBUG(D_INODE,
+              "%s: inode #%lu extent_bytes %u extents %d credits %d\n",
+              osd_ino2name(inode), inode->i_ino, extent_bytes, extents,
+              credits);
+
 out_declare:
        osd_trans_declare_op(env, oh, OSD_OT_WRITE, credits);
 
index 4e787d5..906e53a 100644 (file)
@@ -574,6 +574,24 @@ static ssize_t full_scrub_threshold_rate_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(full_scrub_threshold_rate);
 
+static ssize_t extent_bytes_allocation_show(struct kobject *kobj,
+                                           struct attribute *attr, char *buf)
+{
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osd_device *dev = osd_dt_dev(dt);
+       int i;
+       unsigned int min = (unsigned int)(~0), cur;
+
+       for_each_online_cpu(i) {
+               cur = *per_cpu_ptr(dev->od_extent_bytes_percpu, i);
+               if (cur < min)
+                       min = cur;
+       }
+       return snprintf(buf, PAGE_SIZE, "%u\n", min);
+}
+LUSTRE_RO_ATTR(extent_bytes_allocation);
+
 static int ldiskfs_osd_oi_scrub_seq_show(struct seq_file *m, void *data)
 {
        struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private);
@@ -863,6 +881,7 @@ static struct attribute *ldiskfs_attrs[] = {
        &lustre_attr_pdo.attr,
        &lustre_attr_full_scrub_ratio.attr,
        &lustre_attr_full_scrub_threshold_rate.attr,
+       &lustre_attr_extent_bytes_allocation.attr,
        NULL,
 };