From: Alexander Boyko Date: Thu, 2 Dec 2021 09:43:54 +0000 (-0500) Subject: LU-15252 mdt: reduce contention at mdt_lsom_update X-Git-Tag: 2.14.57~109 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F09%2F45709%2F5;p=fs%2Flustre-release.git LU-15252 mdt: reduce contention at mdt_lsom_update mot_som_mutex serialize all close requests with lsom updates for a same mdt_object. For a massive open/read/close single shared file load, it leads to high load avarage cause many threads sleep on mutex. This patch introduces a cached lsom size, and uses a mutex at update part only. Close requests with lsom size less or equal to cached size would not take a mutex at all. Test results MPI open/flock/funlock/close SSF 10 iterations 10 node 100 thread each, 1000 file ops per thread close time secs master patch MDT load avarage master patch avg 0.142 0.086 47.05 38.89 max 0.164 0.129 49.39 44.77 min 0.097 0.041 44.44 34.7 Signed-off-by: Alexander Boyko Change-Id: I807b468b128295df9391b0467e74d4f10240662e Reviewed-on: https://review.whamcloud.com/45709 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Andrew Perepechko Reviewed-by: Oleg Drokin --- diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index da4e5c1..cc5a870 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -6316,6 +6316,9 @@ static struct lu_object *mdt_object_alloc(const struct lu_env *env, atomic_set(&mo->mot_open_count, 0); mo->mot_restripe_offset = 0; INIT_LIST_HEAD(&mo->mot_restripe_linkage); + mo->mot_lsom_size = 0; + mo->mot_lsom_blocks = 0; + mo->mot_lsom_inited = false; RETURN(o); } RETURN(NULL); diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 7ada2f8..3812e9e 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -345,13 +345,16 @@ struct mdt_object { * attribute cache */ mot_restriping:1, /* dir restriping */ /* dir auto-split disabled */ - mot_auto_split_disabled:1; + mot_auto_split_disabled:1, + mot_lsom_inited:1; /* lsom was inited */ int mot_write_count; spinlock_t mot_write_lock; /* Lock to protect create_data */ struct mutex mot_lov_mutex; /* Lock to protect object's SOM update. */ struct mutex mot_som_mutex; + __u64 mot_lsom_size; + __u64 mot_lsom_blocks; /* lock to protect read/write stages for Data-on-MDT files */ struct rw_semaphore mot_dom_sem; /* Lock to protect lease open. diff --git a/lustre/mdt/mdt_som.c b/lustre/mdt/mdt_som.c index 62c52a5..1f22a30 100644 --- a/lustre/mdt/mdt_som.c +++ b/lustre/mdt/mdt_som.c @@ -100,6 +100,14 @@ int mdt_get_som(struct mdt_thread_info *info, struct mdt_object *obj, attr->la_size = som->ms_size; attr->la_blocks = som->ms_blocks; info->mti_som_valid = 1; + } else if (!obj->mot_lsom_inited && + (som->ms_valid & SOM_FL_LAZY) && + !mutex_is_locked(&obj->mot_som_mutex)) { + mutex_lock(&obj->mot_som_mutex); + obj->mot_lsom_size = som->ms_size; + obj->mot_lsom_blocks = som->ms_blocks; + obj->mot_lsom_inited = true; + mutex_unlock(&obj->mot_som_mutex); } } else if (rc == -ENODATA) { rc = 0; @@ -138,7 +146,11 @@ int mdt_set_som(struct mdt_thread_info *info, struct mdt_object *obj, buf->lb_buf = som; buf->lb_len = sizeof(*som); rc = mo_xattr_set(info->mti_env, next, buf, XATTR_NAME_SOM, 0); - + if (!rc && flag == SOM_FL_LAZY) { + obj->mot_lsom_size = size; + obj->mot_lsom_blocks = blocks; + obj->mot_lsom_inited = true; + } RETURN(rc); } @@ -187,14 +199,20 @@ int mdt_lsom_update(struct mdt_thread_info *info, ma = &info->mti_attr; la = &ma->ma_attr; - mutex_lock(&o->mot_som_mutex); + if (!(la->la_valid & (LA_SIZE | LA_LSIZE) && + o->mot_lsom_size < la->la_size) && + !(la->la_valid & (LA_BLOCKS | LA_LBLOCKS) && + o->mot_lsom_blocks < la->la_blocks) && !truncate && + o->mot_lsom_inited) + RETURN(0); + tmp_ma = &info->mti_u.som.attr; tmp_ma->ma_need = MA_INODE | MA_SOM; tmp_ma->ma_valid = 0; rc = mdt_attr_get_complex(info, o, tmp_ma); if (rc) - GOTO(out_lock, rc); + RETURN(rc); /** * If mti_big_lmm_used is set, it indicates that mti_big_lmm @@ -203,11 +221,11 @@ int mdt_lsom_update(struct mdt_thread_info *info, if (!info->mti_big_lmm_used) { rc = mdt_big_xattr_get(info, o, XATTR_NAME_LOV); if (rc < 0 && rc != -ENODATA) - GOTO(out_lock, rc); + RETURN(rc); /* No LOV EA */ if (rc == -ENODATA) - GOTO(out_lock, rc = 0); + RETURN(0); rc = 0; } @@ -264,7 +282,7 @@ int mdt_lsom_update(struct mdt_thread_info *info, if (som->ms_valid & SOM_FL_STRICT || (som->ms_valid & SOM_FL_STALE && !(ma->ma_attr_flags & MDS_DATA_MODIFIED))) - GOTO(out_lock, rc); + RETURN(rc); size = som->ms_size; blocks = som->ms_blocks; @@ -280,11 +298,22 @@ int mdt_lsom_update(struct mdt_thread_info *info, } } } - if (truncate || changed) + if (truncate || changed) { + mutex_lock(&o->mot_som_mutex); + if (size <= o->mot_lsom_size && + blocks <= o->mot_lsom_blocks && !truncate && + o->mot_lsom_inited) { + mutex_unlock(&o->mot_som_mutex); + RETURN(0); + } + if (!truncate && size < o->mot_lsom_size) + size = o->mot_lsom_size; + if (!truncate && blocks < o->mot_lsom_blocks) + blocks = o->mot_lsom_blocks; rc = mdt_set_som(info, o, SOM_FL_LAZY, size, blocks); + mutex_unlock(&o->mot_som_mutex); + } } -out_lock: - mutex_unlock(&o->mot_som_mutex); RETURN(rc); }