Whamcloud - gitweb
LU-15252 mdt: reduce contention at mdt_lsom_update
authorAlexander Boyko <alexander.boyko@hpe.com>
Thu, 2 Dec 2021 09:43:54 +0000 (04:43 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 26 May 2023 10:21:04 +0000 (10:21 +0000)
mot_som_mutex serialize all close requests with lsom updates for
a same mdt_object. For a massive open/read/close single shared
file load, it leads to high load avarage cause many threads sleep
on mutex.
This patch introduces a cached lsom size, and uses a mutex at update
part only. Close requests with lsom size less or equal to cached size
would not take a mutex at all.

Test results MPI open/flock/funlock/close SSF
10 iterations 10 node 100 thread each, 1000 file ops per thread
close time secs master patch MDT load avarage master patch
avg             0.142  0.086                  47.05  38.89
max             0.164  0.129                  49.39  44.77
min             0.097  0.041                  44.44  34.7

Lustre-change: https://review.whamcloud.com/45709
Lustre-commit: c8b7afe4970415f8dae84f5e20661f8a3b3681a0

Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: I807b468b128295df9391b0467e74d4f10240662e
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51030
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_som.c

index 2a4a463..717ae5a 100644 (file)
@@ -6376,6 +6376,9 @@ static struct lu_object *mdt_object_alloc(const struct lu_env *env,
                atomic_set(&mo->mot_open_count, 0);
                mo->mot_restripe_offset = 0;
                INIT_LIST_HEAD(&mo->mot_restripe_linkage);
+               mo->mot_lsom_size = 0;
+               mo->mot_lsom_blocks = 0;
+               mo->mot_lsom_inited = false;
                RETURN(o);
        }
        RETURN(NULL);
index f5badce..c51036b 100644 (file)
@@ -365,13 +365,16 @@ struct mdt_object {
                                                     * attribute cache */
                                mot_restriping:1,   /* dir restriping */
                                /* dir auto-split disabled */
-                               mot_auto_split_disabled:1;
+                               mot_auto_split_disabled:1,
+                               mot_lsom_inited:1; /* lsom was inited */
        int                     mot_write_count;
        spinlock_t              mot_write_lock;
         /* Lock to protect create_data */
        struct mutex            mot_lov_mutex;
        /* Lock to protect object's SOM update. */
        struct mutex            mot_som_mutex;
+       __u64                   mot_lsom_size;
+       __u64                   mot_lsom_blocks;
        /* lock to protect read/write stages for Data-on-MDT files */
        struct rw_semaphore     mot_dom_sem;
        /* Lock to protect lease open.
index 108f3f7..8bcc862 100644 (file)
@@ -100,6 +100,14 @@ int mdt_get_som(struct mdt_thread_info *info, struct mdt_object *obj,
                        attr->la_size = som->ms_size;
                        attr->la_blocks = som->ms_blocks;
                        info->mti_som_strict = 1;
+               } else if (!obj->mot_lsom_inited &&
+                          (som->ms_valid & SOM_FL_LAZY) &&
+                          !mutex_is_locked(&obj->mot_som_mutex)) {
+                       mutex_lock(&obj->mot_som_mutex);
+                       obj->mot_lsom_size = som->ms_size;
+                       obj->mot_lsom_blocks = som->ms_blocks;
+                       obj->mot_lsom_inited = true;
+                       mutex_unlock(&obj->mot_som_mutex);
                }
        } else if (rc == -ENODATA) {
                rc = 0;
@@ -138,7 +146,11 @@ int mdt_set_som(struct mdt_thread_info *info, struct mdt_object *obj,
        buf->lb_buf = som;
        buf->lb_len = sizeof(*som);
        rc = mo_xattr_set(info->mti_env, next, buf, XATTR_NAME_SOM, 0);
-
+       if (!rc && flag == SOM_FL_LAZY) {
+               obj->mot_lsom_size = size;
+               obj->mot_lsom_blocks = blocks;
+               obj->mot_lsom_inited = true;
+       }
        RETURN(rc);
 }
 
@@ -187,14 +199,20 @@ int mdt_lsom_update(struct mdt_thread_info *info,
        ma = &info->mti_attr;
        la = &ma->ma_attr;
 
-       mutex_lock(&o->mot_som_mutex);
+       if (!(la->la_valid & (LA_SIZE | LA_LSIZE) &&
+             o->mot_lsom_size < la->la_size) &&
+           !(la->la_valid & (LA_BLOCKS | LA_LBLOCKS) &&
+             o->mot_lsom_blocks < la->la_blocks) && !truncate &&
+           o->mot_lsom_inited)
+               RETURN(0);
+
        tmp_ma = &info->mti_u.som.attr;
        tmp_ma->ma_need = MA_INODE | MA_SOM;
        tmp_ma->ma_valid = 0;
 
        rc = mdt_attr_get_complex(info, o, tmp_ma);
        if (rc)
-               GOTO(out_lock, rc);
+               RETURN(rc);
 
        /**
         * If mti_big_lmm_used is set, it indicates that mti_big_lmm
@@ -203,11 +221,11 @@ int mdt_lsom_update(struct mdt_thread_info *info,
        if (!info->mti_big_lmm_used) {
                rc = mdt_big_xattr_get(info, o, XATTR_NAME_LOV);
                if (rc < 0 && rc != -ENODATA)
-                       GOTO(out_lock, rc);
+                       RETURN(rc);
 
                /* No LOV EA */
                if (rc == -ENODATA)
-                       GOTO(out_lock, rc = 0);
+                       RETURN(0);
 
                rc = 0;
        }
@@ -264,7 +282,7 @@ int mdt_lsom_update(struct mdt_thread_info *info,
                                if (som->ms_valid & SOM_FL_STRICT ||
                                    (som->ms_valid & SOM_FL_STALE &&
                                     !(ma->ma_attr_flags & MDS_DATA_MODIFIED)))
-                                       GOTO(out_lock, rc);
+                                       RETURN(rc);
 
                                size = som->ms_size;
                                blocks = som->ms_blocks;
@@ -280,11 +298,22 @@ int mdt_lsom_update(struct mdt_thread_info *info,
                                }
                        }
                }
-               if (truncate || changed)
+               if (truncate || changed) {
+                       mutex_lock(&o->mot_som_mutex);
+                       if (size <= o->mot_lsom_size &&
+                           blocks <= o->mot_lsom_blocks && !truncate &&
+                           o->mot_lsom_inited) {
+                               mutex_unlock(&o->mot_som_mutex);
+                               RETURN(0);
+                       }
+                       if (!truncate && size < o->mot_lsom_size)
+                               size = o->mot_lsom_size;
+                       if (!truncate && blocks < o->mot_lsom_blocks)
+                               blocks = o->mot_lsom_blocks;
                        rc = mdt_set_som(info, o, SOM_FL_LAZY, size, blocks);
+                       mutex_unlock(&o->mot_som_mutex);
+               }
        }
 
-out_lock:
-       mutex_unlock(&o->mot_som_mutex);
        RETURN(rc);
 }