Whamcloud - gitweb
LU-15252 mdt: reduce contention at mdt_lsom_update 09/45709/5
authorAlexander Boyko <alexander.boyko@hpe.com>
Thu, 2 Dec 2021 09:43:54 +0000 (04:43 -0500)
committerOleg Drokin <green@whamcloud.com>
Thu, 23 Dec 2021 07:16:51 +0000 (07:16 +0000)
mot_som_mutex serialize all close requests with lsom updates for
a same mdt_object. For a massive open/read/close single shared
file load, it leads to high load avarage cause many threads sleep
on mutex.
This patch introduces a cached lsom size, and uses a mutex at update
part only. Close requests with lsom size less or equal to cached size
would not take a mutex at all.

Test results MPI open/flock/funlock/close SSF
10 iterations 10 node 100 thread each, 1000 file ops per thread
close time secs master patch MDT load avarage master patch
avg             0.142  0.086                  47.05  38.89
max             0.164  0.129                  49.39  44.77
min             0.097  0.041                  44.44  34.7

Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: I807b468b128295df9391b0467e74d4f10240662e
Reviewed-on: https://review.whamcloud.com/45709
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_som.c

index da4e5c1..cc5a870 100644 (file)
@@ -6316,6 +6316,9 @@ static struct lu_object *mdt_object_alloc(const struct lu_env *env,
                atomic_set(&mo->mot_open_count, 0);
                mo->mot_restripe_offset = 0;
                INIT_LIST_HEAD(&mo->mot_restripe_linkage);
+               mo->mot_lsom_size = 0;
+               mo->mot_lsom_blocks = 0;
+               mo->mot_lsom_inited = false;
                RETURN(o);
        }
        RETURN(NULL);
index 7ada2f8..3812e9e 100644 (file)
@@ -345,13 +345,16 @@ struct mdt_object {
                                                     * attribute cache */
                                mot_restriping:1,   /* dir restriping */
                                /* dir auto-split disabled */
-                               mot_auto_split_disabled:1;
+                               mot_auto_split_disabled:1,
+                               mot_lsom_inited:1; /* lsom was inited */
        int                     mot_write_count;
        spinlock_t              mot_write_lock;
         /* Lock to protect create_data */
        struct mutex            mot_lov_mutex;
        /* Lock to protect object's SOM update. */
        struct mutex            mot_som_mutex;
+       __u64                   mot_lsom_size;
+       __u64                   mot_lsom_blocks;
        /* lock to protect read/write stages for Data-on-MDT files */
        struct rw_semaphore     mot_dom_sem;
        /* Lock to protect lease open.
index 62c52a5..1f22a30 100644 (file)
@@ -100,6 +100,14 @@ int mdt_get_som(struct mdt_thread_info *info, struct mdt_object *obj,
                        attr->la_size = som->ms_size;
                        attr->la_blocks = som->ms_blocks;
                        info->mti_som_valid = 1;
+               } else if (!obj->mot_lsom_inited &&
+                          (som->ms_valid & SOM_FL_LAZY) &&
+                          !mutex_is_locked(&obj->mot_som_mutex)) {
+                       mutex_lock(&obj->mot_som_mutex);
+                       obj->mot_lsom_size = som->ms_size;
+                       obj->mot_lsom_blocks = som->ms_blocks;
+                       obj->mot_lsom_inited = true;
+                       mutex_unlock(&obj->mot_som_mutex);
                }
        } else if (rc == -ENODATA) {
                rc = 0;
@@ -138,7 +146,11 @@ int mdt_set_som(struct mdt_thread_info *info, struct mdt_object *obj,
        buf->lb_buf = som;
        buf->lb_len = sizeof(*som);
        rc = mo_xattr_set(info->mti_env, next, buf, XATTR_NAME_SOM, 0);
-
+       if (!rc && flag == SOM_FL_LAZY) {
+               obj->mot_lsom_size = size;
+               obj->mot_lsom_blocks = blocks;
+               obj->mot_lsom_inited = true;
+       }
        RETURN(rc);
 }
 
@@ -187,14 +199,20 @@ int mdt_lsom_update(struct mdt_thread_info *info,
        ma = &info->mti_attr;
        la = &ma->ma_attr;
 
-       mutex_lock(&o->mot_som_mutex);
+       if (!(la->la_valid & (LA_SIZE | LA_LSIZE) &&
+             o->mot_lsom_size < la->la_size) &&
+           !(la->la_valid & (LA_BLOCKS | LA_LBLOCKS) &&
+             o->mot_lsom_blocks < la->la_blocks) && !truncate &&
+           o->mot_lsom_inited)
+               RETURN(0);
+
        tmp_ma = &info->mti_u.som.attr;
        tmp_ma->ma_need = MA_INODE | MA_SOM;
        tmp_ma->ma_valid = 0;
 
        rc = mdt_attr_get_complex(info, o, tmp_ma);
        if (rc)
-               GOTO(out_lock, rc);
+               RETURN(rc);
 
        /**
         * If mti_big_lmm_used is set, it indicates that mti_big_lmm
@@ -203,11 +221,11 @@ int mdt_lsom_update(struct mdt_thread_info *info,
        if (!info->mti_big_lmm_used) {
                rc = mdt_big_xattr_get(info, o, XATTR_NAME_LOV);
                if (rc < 0 && rc != -ENODATA)
-                       GOTO(out_lock, rc);
+                       RETURN(rc);
 
                /* No LOV EA */
                if (rc == -ENODATA)
-                       GOTO(out_lock, rc = 0);
+                       RETURN(0);
 
                rc = 0;
        }
@@ -264,7 +282,7 @@ int mdt_lsom_update(struct mdt_thread_info *info,
                                if (som->ms_valid & SOM_FL_STRICT ||
                                    (som->ms_valid & SOM_FL_STALE &&
                                     !(ma->ma_attr_flags & MDS_DATA_MODIFIED)))
-                                       GOTO(out_lock, rc);
+                                       RETURN(rc);
 
                                size = som->ms_size;
                                blocks = som->ms_blocks;
@@ -280,11 +298,22 @@ int mdt_lsom_update(struct mdt_thread_info *info,
                                }
                        }
                }
-               if (truncate || changed)
+               if (truncate || changed) {
+                       mutex_lock(&o->mot_som_mutex);
+                       if (size <= o->mot_lsom_size &&
+                           blocks <= o->mot_lsom_blocks && !truncate &&
+                           o->mot_lsom_inited) {
+                               mutex_unlock(&o->mot_som_mutex);
+                               RETURN(0);
+                       }
+                       if (!truncate && size < o->mot_lsom_size)
+                               size = o->mot_lsom_size;
+                       if (!truncate && blocks < o->mot_lsom_blocks)
+                               blocks = o->mot_lsom_blocks;
                        rc = mdt_set_som(info, o, SOM_FL_LAZY, size, blocks);
+                       mutex_unlock(&o->mot_som_mutex);
+               }
        }
 
-out_lock:
-       mutex_unlock(&o->mot_som_mutex);
        RETURN(rc);
 }