Whamcloud - gitweb
LU-16972 e2fsck: use rb-tree to track EA reference counts 29/51729/4
authorLi Dongyang <dongyangli@ddn.com>
Wed, 26 Jul 2023 05:20:54 +0000 (15:20 +1000)
committerAndreas Dilger <adilger@whamcloud.com>
Mon, 7 Aug 2023 14:07:35 +0000 (14:07 +0000)
Using the sorted array to track the EA blocks and
its refs is not scalable.
When the file system has a huge number of EA blocks,
pass1 scanning could not be finished within a reasonable
time, as 95%+ of CPU time is spent in memmove() when
trying to enlarge the the sorted array.

On a file system with 20 million EA blocks on an NVMe device
pass1 time taken:
without patch:
time: 2014.78/1838.70/19.91
with patch:
time: 45.17/20.17/20.19

Change-Id: I6dc1ee3037dbf7a48deb610514af1f0e35a5a397
Signed-off-by: Li Dongyang <dongyangli@ddn.com>
Reviewed-on: https://review.whamcloud.com/c/tools/e2fsprogs/+/51729
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
e2fsck/e2fsck.h
e2fsck/ea_refcount.c
e2fsck/pass1.c

index db33085..eb22c74 100644 (file)
@@ -653,7 +653,7 @@ extern struct dx_dir_info *e2fsck_dx_dir_info_iter(e2fsck_t ctx,
 typedef __u64 ea_key_t;
 typedef __u64 ea_value_t;
 
-extern errcode_t ea_refcount_create(size_t size, ext2_refcount_t *ret);
+extern errcode_t ea_refcount_create(ext2_refcount_t *ret);
 extern void ea_refcount_free(ext2_refcount_t refcount);
 extern errcode_t ea_refcount_fetch(ext2_refcount_t refcount, ea_key_t ea_key,
                                   ea_value_t *ret);
@@ -663,7 +663,6 @@ extern errcode_t ea_refcount_decrement(ext2_refcount_t refcount,
                                       ea_key_t ea_key, ea_value_t *ret);
 extern errcode_t ea_refcount_store(ext2_refcount_t refcount, ea_key_t ea_key,
                                   ea_value_t count);
-extern size_t ext2fs_get_refcount_size(ext2_refcount_t refcount);
 extern void ea_refcount_intr_begin(ext2_refcount_t refcount);
 extern ea_key_t ea_refcount_intr_next(ext2_refcount_t refcount,
                                      ea_value_t *ret);
index 7154b47..fb8b009 100644 (file)
 #undef ENABLE_NLS
 #endif
 #include "e2fsck.h"
+#include "ext2fs/rbtree.h"
 
 /*
  * The strategy we use for keeping track of EA refcounts is as
- * follows.  We keep a sorted array of first EA blocks and its
- * reference counts.  Once the refcount has dropped to zero, it is
- * removed from the array to save memory space.  Once the EA block is
+ * follows.  We keep the first EA blocks and its reference counts
+ * in the rb-tree.  Once the refcount has dropped to zero, it is
+ * removed from the rb-tree to save memory space.  Once the EA block is
  * checked, its bit is set in the block_ea_map bitmap.
  */
 struct ea_refcount_el {
+       struct rb_node  node;
        /* ea_key could either be an inode number or block number. */
        ea_key_t        ea_key;
        ea_value_t      ea_value;
 };
 
 struct ea_refcount {
-       size_t          count;
-       size_t          size;
-       size_t          cursor;
-       struct ea_refcount_el   *list;
+       struct rb_root  root;
+       struct rb_node  *cursor;
 };
 
 void ea_refcount_free(ext2_refcount_t refcount)
 {
+       struct ea_refcount_el *el;
+       struct rb_node *node, *next;
+
        if (!refcount)
                return;
 
-       if (refcount->list)
-               ext2fs_free_mem(&refcount->list);
+       for (node = ext2fs_rb_first(&refcount->root); node; node = next) {
+               next = ext2fs_rb_next(node);
+               el = ext2fs_rb_entry(node, struct ea_refcount_el, node);
+               ext2fs_rb_erase(node, &refcount->root);
+               ext2fs_free_mem(&el);
+       }
        ext2fs_free_mem(&refcount);
 }
 
-errcode_t ea_refcount_create(size_t size, ext2_refcount_t *ret)
+errcode_t ea_refcount_create(ext2_refcount_t *ret)
 {
        ext2_refcount_t refcount;
        errcode_t       retval;
-       size_t          bytes;
 
        retval = ext2fs_get_memzero(sizeof(struct ea_refcount), &refcount);
        if (retval)
                return retval;
-
-       if (!size)
-               size = 500;
-       refcount->size = size;
-       bytes = size * sizeof(struct ea_refcount_el);
-#ifdef DEBUG
-       printf("Refcount allocated %zu entries, %zu bytes.\n",
-              refcount->size, bytes);
-#endif
-       retval = ext2fs_get_memzero(bytes, &refcount->list);
-       if (retval)
-               goto errout;
-
-       refcount->count = 0;
-       refcount->cursor = 0;
+       refcount->root = RB_ROOT;
 
        *ret = refcount;
        return 0;
-
-errout:
-       ea_refcount_free(refcount);
-       return(retval);
-}
-
-/*
- * collapse_refcount() --- go through the refcount array, and get rid
- * of any count == zero entries
- */
-static void refcount_collapse(ext2_refcount_t refcount)
-{
-       unsigned int    i, j;
-       struct ea_refcount_el   *list;
-
-       list = refcount->list;
-       for (i = 0, j = 0; i < refcount->count; i++) {
-               if (list[i].ea_value) {
-                       if (i != j)
-                               list[j] = list[i];
-                       j++;
-               }
-       }
-#if defined(DEBUG) || defined(TEST_PROGRAM)
-       printf("Refcount_collapse: size was %zu, now %d\n",
-              refcount->count, j);
-#endif
-       refcount->count = j;
-}
-
-
-/*
- * insert_refcount_el() --- Insert a new entry into the sorted list at a
- *     specified position.
- */
-static struct ea_refcount_el *insert_refcount_el(ext2_refcount_t refcount,
-                                                ea_key_t ea_key, int pos)
-{
-       struct ea_refcount_el   *el;
-       errcode_t               retval;
-       size_t                  new_size = 0;
-       int                     num;
-
-       if (refcount->count >= refcount->size) {
-               new_size = refcount->size + 100;
-#ifdef DEBUG
-               printf("Reallocating refcount %d entries...\n", new_size);
-#endif
-               retval = ext2fs_resize_mem((size_t) refcount->size *
-                                          sizeof(struct ea_refcount_el),
-                                          (size_t) new_size *
-                                          sizeof(struct ea_refcount_el),
-                                          &refcount->list);
-               if (retval)
-                       return 0;
-               refcount->size = new_size;
-       }
-       num = (int) refcount->count - pos;
-       if (num < 0)
-               return 0;       /* should never happen */
-       if (num) {
-               memmove(&refcount->list[pos+1], &refcount->list[pos],
-                       sizeof(struct ea_refcount_el) * num);
-       }
-       refcount->count++;
-       el = &refcount->list[pos];
-       el->ea_key = ea_key;
-       el->ea_value = 0;
-       return el;
 }
 
-
 /*
  * get_refcount_el() --- given an block number, try to find refcount
- *     information in the sorted list.  If the create flag is set,
- *     and we can't find an entry, create one in the sorted list.
+ *     information in the rb-tree.  If the create flag is set,
+ *     and we can't find an entry, create and add it to rb-tree.
  */
 static struct ea_refcount_el *get_refcount_el(ext2_refcount_t refcount,
                                              ea_key_t ea_key, int create)
 {
-       int     low, high, mid;
+       struct rb_node          **node;
+       struct rb_node          *parent = NULL;
+       struct ea_refcount_el   *el;
+       errcode_t               retval;
 
-       if (!refcount || !refcount->list)
-               return 0;
-retry:
-       low = 0;
-       high = (int) refcount->count-1;
-       if (create && ((refcount->count == 0) ||
-                      (ea_key > refcount->list[high].ea_key))) {
-               if (refcount->count >= refcount->size)
-                       refcount_collapse(refcount);
-
-               return insert_refcount_el(refcount, ea_key,
-                                         (unsigned) refcount->count);
-       }
-       if (refcount->count == 0)
+       if (!refcount)
                return 0;
 
-       if (refcount->cursor >= refcount->count)
-               refcount->cursor = 0;
-       if (ea_key == refcount->list[refcount->cursor].ea_key)
-               return &refcount->list[refcount->cursor++];
-#ifdef DEBUG
-       printf("Non-cursor get_refcount_el: %u\n", ea_key);
-#endif
-       while (low <= high) {
-               mid = (low+high)/2;
-               if (ea_key == refcount->list[mid].ea_key) {
-                       refcount->cursor = mid+1;
-                       return &refcount->list[mid];
-               }
-               if (ea_key < refcount->list[mid].ea_key)
-                       high = mid-1;
+       node = &refcount->root.rb_node;
+       while (*node) {
+               el = ext2fs_rb_entry(*node, struct ea_refcount_el, node);
+
+               parent = *node;
+               if (ea_key < el->ea_key)
+                       node = &(*node)->rb_left;
+               else if (ea_key > el->ea_key)
+                       node = &(*node)->rb_right;
                else
-                       low = mid+1;
-       }
-       /*
-        * If we need to create a new entry, it should be right at
-        * low (where high will be left at low-1).
-        */
-       if (create) {
-               if (refcount->count >= refcount->size) {
-                       refcount_collapse(refcount);
-                       if (refcount->count < refcount->size)
-                               goto retry;
-               }
-               return insert_refcount_el(refcount, ea_key, low);
+                       return el;
        }
-       return 0;
+
+       if (!create)
+               return 0;
+
+       retval = ext2fs_get_memzero(sizeof(struct ea_refcount_el), &el);
+       if (retval)
+               return 0;
+
+       el->ea_key = ea_key;
+       el->ea_value = 0;
+       ext2fs_rb_link_node(&el->node, parent, node);
+       ext2fs_rb_insert_color(&el->node, &refcount->root);
+
+       return el;
 }
 
 errcode_t ea_refcount_fetch(ext2_refcount_t refcount, ea_key_t ea_key,
@@ -240,13 +147,18 @@ errcode_t ea_refcount_decrement(ext2_refcount_t refcount, ea_key_t ea_key,
        struct ea_refcount_el   *el;
 
        el = get_refcount_el(refcount, ea_key, 0);
-       if (!el || el->ea_value == 0)
+       if (!el)
                return EXT2_ET_INVALID_ARGUMENT;
 
        el->ea_value--;
 
        if (ret)
                *ret = el->ea_value;
+
+       if (el->ea_value == 0) {
+               ext2fs_rb_erase(&el->node, &refcount->root);
+               ext2fs_free_mem(&el);
+       }
        return 0;
 }
 
@@ -262,17 +174,13 @@ errcode_t ea_refcount_store(ext2_refcount_t refcount, ea_key_t ea_key,
        if (!el)
                return ea_value ? EXT2_ET_NO_MEMORY : 0;
        el->ea_value = ea_value;
+       if (el->ea_value == 0) {
+               ext2fs_rb_erase(&el->node, &refcount->root);
+               ext2fs_free_mem(&el);
+       }
        return 0;
 }
 
-size_t ext2fs_get_refcount_size(ext2_refcount_t refcount)
-{
-       if (!refcount)
-               return 0;
-
-       return refcount->size;
-}
-
 void ea_refcount_intr_begin(ext2_refcount_t refcount)
 {
        refcount->cursor = 0;
@@ -281,19 +189,23 @@ void ea_refcount_intr_begin(ext2_refcount_t refcount)
 ea_key_t ea_refcount_intr_next(ext2_refcount_t refcount,
                                ea_value_t *ret)
 {
-       struct ea_refcount_el   *list;
-
-       while (1) {
-               if (refcount->cursor >= refcount->count)
-                       return 0;
-               list = refcount->list;
-               if (list[refcount->cursor].ea_value) {
-                       if (ret)
-                               *ret = list[refcount->cursor].ea_value;
-                       return list[refcount->cursor++].ea_key;
-               }
-               refcount->cursor++;
+       struct ea_refcount_el   *el;
+       struct rb_node          *node = refcount->cursor;
+
+       if (node == NULL)
+               node = ext2fs_rb_first(&refcount->root);
+       else
+               node = ext2fs_rb_next(node);
+
+       if (node) {
+               refcount->cursor = node;
+               el = ext2fs_rb_entry(node, struct ea_refcount_el, node);
+               if (ret)
+                       *ret = el->ea_value;
+               return el->ea_key;
        }
+
+       return 0;
 }
 
 
@@ -301,26 +213,28 @@ ea_key_t ea_refcount_intr_next(ext2_refcount_t refcount,
 
 errcode_t ea_refcount_validate(ext2_refcount_t refcount, FILE *out)
 {
-       errcode_t       ret = 0;
-       int             i;
+       struct ea_refcount_el   *el;
+       struct rb_node          *node;
+       ea_key_t                prev;
+       int                     prev_valid = 0;
        const char *bad = "bad refcount";
 
-       if (refcount->count > refcount->size) {
-               fprintf(out, "%s: count > size\n", bad);
-               return EXT2_ET_INVALID_ARGUMENT;
-       }
-       for (i=1; i < refcount->count; i++) {
-               if (refcount->list[i-1].ea_key >= refcount->list[i].ea_key) {
+       for (node = ext2fs_rb_first(&refcount->root); node != NULL;
+            node = ext2fs_rb_next(node)) {
+               el = ext2fs_rb_entry(node, struct ea_refcount_el, node);
+               if (prev_valid && prev >= el->ea_key) {
                        fprintf(out,
-                               "%s: list[%d].ea_key=%llu, list[%d].ea_key=%llu\n",
-                               bad, i-1,
-                               (unsigned long long) refcount->list[i-1].ea_key,
-                               i,
-                               (unsigned long long) refcount->list[i].ea_key);
-                       ret = EXT2_ET_INVALID_ARGUMENT;
+                               "%s: prev.ea_key=%llu, curr.ea_key=%llu\n",
+                               bad,
+                               (unsigned long long) prev,
+                               (unsigned long long) el->ea_key);
+                       return EXT2_ET_INVALID_ARGUMENT;
                }
+               prev = el->ea_key;
+               prev_valid = 1;
        }
-       return ret;
+
+       return 0;
 }
 
 #define BCODE_END      0
@@ -332,10 +246,9 @@ errcode_t ea_refcount_validate(ext2_refcount_t refcount, FILE *out)
 #define BCODE_FETCH    6
 #define BCODE_VALIDATE 7
 #define BCODE_LIST     8
-#define BCODE_COLLAPSE 9
 
 int bcode_program[] = {
-       BCODE_CREATE, 5,
+       BCODE_CREATE,
        BCODE_STORE, 3, 3,
        BCODE_STORE, 4, 4,
        BCODE_STORE, 1, 1,
@@ -362,7 +275,6 @@ int bcode_program[] = {
        BCODE_FETCH, 30,
        BCODE_DECR, 2,
        BCODE_DECR, 2,
-       BCODE_COLLAPSE,
        BCODE_LIST,
        BCODE_VALIDATE,
        BCODE_FREE,
@@ -373,7 +285,6 @@ int main(int argc, char **argv)
 {
        int     i = 0;
        ext2_refcount_t refcount;
-       size_t          size;
        ea_key_t        ea_key;
        ea_value_t      arg;
        errcode_t       retval;
@@ -383,15 +294,13 @@ int main(int argc, char **argv)
                case BCODE_END:
                        exit(0);
                case BCODE_CREATE:
-                       size = bcode_program[i++];
-                       retval = ea_refcount_create(size, &refcount);
+                       retval = ea_refcount_create(&refcount);
                        if (retval) {
                                com_err("ea_refcount_create", retval,
-                                       "while creating size %zu", size);
+                                       "while creating refcount");
                                exit(1);
                        } else
-                               printf("Creating refcount with size %zu\n",
-                                      size);
+                               printf("Creating refcount\n");
                        break;
                case BCODE_FREE:
                        ea_refcount_free(refcount);
@@ -465,9 +374,6 @@ int main(int argc, char **argv)
                                       (unsigned long long) arg);
                        }
                        break;
-               case BCODE_COLLAPSE:
-                       refcount_collapse(refcount);
-                       break;
                }
 
        }
index 9e49407..037b99a 100644 (file)
@@ -430,8 +430,7 @@ static void inc_ea_inode_refs(e2fsck_t ctx, struct problem_context *pctx,
                if (!entry->e_value_inum)
                        goto next;
                if (!ctx->ea_inode_refs) {
-                       pctx->errcode = ea_refcount_create(0,
-                                                          &ctx->ea_inode_refs);
+                       pctx->errcode = ea_refcount_create(&ctx->ea_inode_refs);
                        if (pctx->errcode) {
                                pctx->num = 4;
                                fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
@@ -3290,8 +3289,7 @@ static errcode_t e2fsck_pass1_merge_ea_refcount(e2fsck_t global_ctx,
                                continue;
 
                        if (!global_ctx->refcount_extra) {
-                               retval = ea_refcount_create(0,
-                                               &global_ctx->refcount_extra);
+                               retval = ea_refcount_create(&global_ctx->refcount_extra);
                                if (retval)
                                        return retval;
                        }
@@ -3324,8 +3322,7 @@ static errcode_t e2fsck_pass1_merge_ea_refcount(e2fsck_t global_ctx,
                                return retval;
                        /* Ooops, this EA was referenced more than it stated */
                        if (!global_ctx->refcount_extra) {
-                               retval = ea_refcount_create(0,
-                                               &global_ctx->refcount_extra);
+                               retval = ea_refcount_create(&global_ctx->refcount_extra);
                                if (retval)
                                        return retval;
                        }
@@ -4177,8 +4174,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 
        /* Create the EA refcount structure if necessary */
        if (!ctx->refcount) {
-               pctx->errcode = ea_refcount_create(0,
-                                       &ctx->refcount_orig);
+               pctx->errcode = ea_refcount_create(&ctx->refcount_orig);
                if (pctx->errcode) {
                        pctx->num = 1;
                        fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
@@ -4186,7 +4182,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
                        return 0;
                }
 
-               pctx->errcode = ea_refcount_create(0, &ctx->refcount);
+               pctx->errcode = ea_refcount_create(&ctx->refcount);
                if (pctx->errcode) {
                        pctx->num = 1;
                        fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
@@ -4220,8 +4216,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
                        return 1;
                /* Ooops, this EA was referenced more than it stated */
                if (!ctx->refcount_extra) {
-                       pctx->errcode = ea_refcount_create(0,
-                                          &ctx->refcount_extra);
+                       pctx->errcode = ea_refcount_create(&ctx->refcount_extra);
                        if (pctx->errcode) {
                                pctx->num = 2;
                                fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
@@ -4362,8 +4357,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 
        if (quota_blocks != EXT2FS_C2B(fs, 1U)) {
                if (!ctx->ea_block_quota_blocks) {
-                       pctx->errcode = ea_refcount_create(0,
-                                               &ctx->ea_block_quota_blocks);
+                       pctx->errcode = ea_refcount_create(&ctx->ea_block_quota_blocks);
                        if (pctx->errcode) {
                                pctx->num = 3;
                                goto refcount_fail;
@@ -4375,8 +4369,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 
        if (quota_inodes) {
                if (!ctx->ea_block_quota_inodes) {
-                       pctx->errcode = ea_refcount_create(0,
-                                               &ctx->ea_block_quota_inodes);
+                       pctx->errcode = ea_refcount_create(&ctx->ea_block_quota_inodes);
                        if (pctx->errcode) {
                                pctx->num = 4;
 refcount_fail: