From 029bc1c294c3c78af863ab05524275978f4e1b2d Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Mon, 14 Oct 2024 17:23:26 -0600 Subject: [PATCH] LU-14520 ldlm: reduce ldlm_lock memory usage Reduce the size of struct ldlm_lock so that it can fit into a 512-byte slab allocation. The primary reduction in memory usage is from moving struct l_ost_lvb into the union for IBITS locks where it is needed, while it was previously part of the common strucutre. Add assertions to verify that the l_ost_lvb field is only used for IBITS lock type. Additionally, shrink some of the other fields in memory to bitfields that only use the required bits, and pack them together to save space. These are read-mostly fields for the lifetime of the lock and give some space to add fields in the future before hitting 512 bytes again. ldlm_lock BEFORE PATCHED size: 536, members: 36 size: 496, members: 35 sum used: 536, holes: 0 sum used: 490, holes: 2, sum holes: 6 bit holes: 0 bit holes: 1, sum bit holes: 10 Also reduce struct ldlm_resource by a few bytes, so that 23 can fit into a single 4096-byte slab instead of the previous 22 in one slab. ldlm_resource BEFORE PATCHED size: 184, members: 14 size: 176, members: 14 padding: 7, holes: 0 sum used: 171, holes: 1, sum holes: 5 bit holes: 0 bit holes: 1, sum bit holes: 4 Signed-off-by: Andreas Dilger Change-Id: I7f017a852228f8c9090e890f2766707fdbde2abd Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56685 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Yang Sheng Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lustre/include/lustre_dlm.h | 80 ++++++++++++++++++++++----------------------- lustre/ldlm/ldlm_lock.c | 32 +++++++++--------- lustre/ldlm/ldlm_lockd.c | 6 +++- lustre/ldlm/ldlm_request.c | 2 +- lustre/llite/file.c | 2 +- lustre/mdc/mdc_dev.c | 57 +++++++++++++++++--------------- lustre/mdc/mdc_locks.c | 2 ++ lustre/mdt/mdt_lvb.c | 1 + lustre/ofd/ofd_lvb.c | 1 + 9 files changed, 98 insertions(+), 85 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index cd4d6f0..adaa82f 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -754,6 +754,7 @@ enum lvb_type { LVB_T_OST = 1, LVB_T_LQUOTA = 2, LVB_T_LAYOUT = 3, + LVB_T_END }; /** @@ -805,27 +806,29 @@ struct ldlm_lock { * Internal structures per lock type.. */ union { - /* LDLM_EXTENT locks only */ - struct { + struct { /* LDLM_EXTENT locks only */ /* Originally requested extent for the extent lock. */ struct ldlm_extent l_req_extent; struct rb_node l_rb; u64 l_subtree_last; struct list_head l_same_extent; }; - /* LDLM_PLAIN and LDLM_IBITS locks */ - struct { + struct { /* LDLM_PLAIN and LDLM_IBITS locks */ /** * Protected by lr_lock, linkages to "skip lists". - * For more explanations of skip lists see ldlm/ldlm_inodebits.c + * For explanations of skip lists see + * ldlm/ldlm_inodebits.c */ struct list_head l_sl_mode; struct list_head l_sl_policy; struct ldlm_ibits_node *l_ibits_node; + /* separate ost_lvb used mostly by Data-on-MDT for now. + * It is introduced to don't mix with layout lock data. + */ + struct ost_lvb l_ost_lvb; }; - /* LDLM_FLOCK locks */ - struct { + struct { /* LDLM_FLOCK locks */ /** * Per export hash of flock locks. * Protected by per-bucket exp->exp_flock_hash locks. @@ -842,15 +845,32 @@ struct ldlm_lock { * Protected by per-bucket exp->exp_lock_hash locks. */ struct hlist_node l_exp_hash; + + /* Requested mode. Protected by lr_lock. */ + enum ldlm_mode l_req_mode:9; + /* Granted mode, also protected by lr_lock. */ + enum ldlm_mode l_granted_mode:9; + /** - * Requested mode. - * Protected by lr_lock. + * Whether the blocking AST was sent for this lock. + * This is for debugging. Valid values are 0 and 1, if there is an + * attempt to send blocking AST more than once, an assertion would be + * hit. \see ldlm_work_bl_ast_lock */ - enum ldlm_mode l_req_mode; - /** - * Granted mode, also protected by lr_lock. + unsigned int l_bl_ast_run:1; + + /* content type for lock value block */ + enum lvb_type l_lvb_type:3; + /* unsigned int l_unused_bits:10; */ + u16 l_lvb_len; + /* u16 l_unused; */ + + /* + * Temporary storage for a LVB received during an enqueue operation. + * May be vmalloc'd, so needs to be freed with OBD_FREE_LARGE(). */ - enum ldlm_mode l_granted_mode; + void *l_lvb_data; + /** Lock completion handler pointer. Called when lock is granted. */ ldlm_completion_callback l_completion_ast; /** @@ -920,19 +940,6 @@ struct ldlm_lock { */ ktime_t l_last_used; - /* - * Client-side-only members. - */ - - enum lvb_type l_lvb_type; - - /** - * Temporary storage for a LVB received during an enqueue operation. - * May be vmalloc'd, so needs to be freed with OBD_FREE_LARGE(). - */ - __u32 l_lvb_len; - void *l_lvb_data; - /** Private storage for lock user. Opaque to LDLM. */ void *l_ast_data; @@ -946,10 +953,6 @@ struct ldlm_lock { time64_t l_blast_sent; }; - /* separate ost_lvb used mostly by Data-on-MDT for now. - * It is introduced to don't mix with layout lock data. - */ - struct ost_lvb l_ost_lvb; /* * Server-side-only members. */ @@ -981,14 +984,8 @@ struct ldlm_lock { /** Local PID of process which created this lock. */ __u32 l_pid; + /* __u32 l_unused; */ - /** - * Number of times blocking AST was sent for this lock. - * This is for debugging. Valid values are 0 and 1, if there is an - * attempt to send blocking AST more than once, an assertion would be - * hit. \see ldlm_work_bl_ast_lock - */ - int l_bl_ast_run; /** List item ldlm_add_ast_work_item() for case of blocking ASTs. */ struct list_head l_bl_ast; /** List item ldlm_add_ast_work_item() for case of completion ASTs. */ @@ -1140,18 +1137,19 @@ struct ldlm_resource { }; /** Type of locks this resource can hold. Only one type per resource. */ - enum ldlm_type lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */ + enum ldlm_type lr_type:4; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */ + /* unsigned int lr_unused_bits:4; */ + /* char lr_unused[5]; */ /** * Server-side-only lock value block elements. * To serialize lvbo_init. */ - int lr_lvb_len; + bool lr_lvb_initialized; + char lr_lvb_len; struct mutex lr_lvb_mutex; /** protected by lr_lock */ void *lr_lvb_data; - /** is lvb initialized ? */ - bool lr_lvb_initialized; }; static inline int ldlm_is_granted(struct ldlm_lock *lock) diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 63ebd47..08c6b5a 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -1558,18 +1558,18 @@ EXPORT_SYMBOL(ldlm_revalidate_lock_handle); /** The caller must guarantee that the buffer is large enough. */ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill, - enum req_location loc, void *data, int size) + enum req_location loc, void *data, int lvb_len) { void *lvb; ENTRY; LASSERT(data != NULL); - LASSERT(size >= 0); + LASSERT(lvb_len >= 0); switch (lock->l_lvb_type) { case LVB_T_OST: - if (size == sizeof(struct ost_lvb)) { + if (lvb_len == sizeof(struct ost_lvb)) { if (loc == RCL_CLIENT) lvb = req_capsule_client_swab_get(pill, &RMF_DLM_LVB, @@ -1583,8 +1583,8 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill, RETURN(-EPROTO); } - memcpy(data, lvb, size); - } else if (size == sizeof(struct ost_lvb_v1)) { + memcpy(data, lvb, lvb_len); + } else if (lvb_len == sizeof(struct ost_lvb_v1)) { struct ost_lvb *olvb = data; if (loc == RCL_CLIENT) @@ -1593,25 +1593,25 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill, lustre_swab_ost_lvb_v1); else lvb = req_capsule_server_sized_swab_get(pill, - &RMF_DLM_LVB, size, + &RMF_DLM_LVB, lvb_len, lustre_swab_ost_lvb_v1); if (unlikely(lvb == NULL)) { LDLM_ERROR(lock, "no LVB"); RETURN(-EPROTO); } - memcpy(data, lvb, size); + memcpy(data, lvb, lvb_len); olvb->lvb_mtime_ns = 0; olvb->lvb_atime_ns = 0; olvb->lvb_ctime_ns = 0; } else { LDLM_ERROR(lock, "Replied unexpected ost LVB size %d", - size); + lvb_len); RETURN(-EINVAL); } break; case LVB_T_LQUOTA: - if (size == sizeof(struct lquota_lvb)) { + if (lvb_len == sizeof(struct lquota_lvb)) { if (loc == RCL_CLIENT) lvb = req_capsule_client_swab_get(pill, &RMF_DLM_LVB, @@ -1625,16 +1625,16 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill, RETURN(-EPROTO); } - memcpy(data, lvb, size); + memcpy(data, lvb, lvb_len); } else { LDLM_ERROR(lock, "Replied unexpected lquota LVB size %d", - size); + lvb_len); RETURN(-EINVAL); } break; case LVB_T_LAYOUT: - if (size == 0) + if (lvb_len == 0) break; if (loc == RCL_CLIENT) @@ -1646,10 +1646,11 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill, RETURN(-EPROTO); } - memcpy(data, lvb, size); + memcpy(data, lvb, lvb_len); break; default: - LDLM_ERROR(lock, "Unknown LVB type: %d", lock->l_lvb_type); + LDLM_ERROR(lock, "Unknown LVB type=%d, size=%d", + lock->l_lvb_type, lvb_len); dump_stack(); RETURN(-EINVAL); } @@ -1702,6 +1703,7 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns, } if (lvb_len) { + LASSERT(lvb_len < 1 << sizeof(lock->l_lvb_len) * 8); lock->l_lvb_len = lvb_len; OBD_ALLOC_LARGE(lock->l_lvb_data, lvb_len); if (lock->l_lvb_data == NULL) @@ -2154,7 +2156,7 @@ ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) LASSERT(ldlm_is_ast_sent(lock)); LASSERT(lock->l_bl_ast_run == 0); - lock->l_bl_ast_run++; + lock->l_bl_ast_run = 1; ldlm_clear_blocking_lock(lock); unlock_res_and_lock(lock); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 32d9f89..3e15d5c 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -2010,7 +2010,7 @@ static int ldlm_handle_cp_callback(struct ptlrpc_request *req, if (unlikely(lock->l_lvb_len < lvb_len)) { LDLM_ERROR(lock, - "Replied LVB is larger than expectation, expected = %d, replied = %d", + "Replied LVB is larger than expectation, expected = %u, replied = %u", lock->l_lvb_len, lvb_len); GOTO(out, rc = -EINVAL); } @@ -3497,6 +3497,10 @@ static int ldlm_cleanup(void) int ldlm_init(void) { + BUILD_BUG_ON(LDLM_MAX_TYPE > (1 << 4 /* lr_type bits */)); + BUILD_BUG_ON(LVB_T_END > (1 << 3 /* l_lvb_type bits */)); + BUILD_BUG_ON(LCK_MAXMODE > (1 << 9 /* l_req_mode/l_granted_mode */)); + ldlm_resource_slab = kmem_cache_create("ldlm_resources", sizeof(struct ldlm_resource), 0, SLAB_HWCACHE_ALIGN, NULL); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index e7ac552..d2e6c1f 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -622,7 +622,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct req_capsule *pill, } LASSERTF(ergo(lvb_len != 0, lvb_len == lock->l_lvb_len), - "lvb_len = %d, l_lvb_len = %d\n", lvb_len, lock->l_lvb_len); + "lvb_len = %u, l_lvb_len = %u\n", lvb_len, lock->l_lvb_len); if (rc != ELDLM_OK) { LASSERT(!is_replay); diff --git a/lustre/llite/file.c b/lustre/llite/file.c index d747495..924d128 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -6707,7 +6707,7 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) int rc; ENTRY; - CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n", + CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%u\n", PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock), lock->l_lvb_data, lock->l_lvb_len); diff --git a/lustre/mdc/mdc_dev.c b/lustre/mdc/mdc_dev.c index fe96b96..92f58d1 100644 --- a/lustre/mdc/mdc_dev.c +++ b/lustre/mdc/mdc_dev.c @@ -414,6 +414,8 @@ void mdc_lock_lvb_update(const struct lu_env *env, struct osc_object *osc, if (lvb == NULL) { LASSERT(dlmlock != NULL); + /* l_ost_lvb is only in the LDLM_IBITS union **/ + LASSERT(dlmlock->l_resource->lr_type == LDLM_IBITS); lvb = &dlmlock->l_ost_lvb; } cl_lvb2attr(attr, lvb); @@ -605,9 +607,10 @@ static int mdc_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, /* At this point ols_lvb must be filled with correct LVB either * by mdc_fill_lvb() above or by ldlm_cli_enqueue_fini(). - * DoM uses l_ost_lvb to store LVB data, so copy it here from - * just updated ols_lvb. + * DoM uses l_ost_lvb to store LVB data (only available with + * LDLM_IBITS locks), so copy it here from just updated ols_lvb. */ + LASSERT(lock->l_resource->lr_type == LDLM_IBITS); lock_res_and_lock(lock); memcpy(&lock->l_ost_lvb, &ols->ols_lvb, sizeof(lock->l_ost_lvb)); @@ -1455,30 +1458,32 @@ static int mdc_object_ast_clear(struct ldlm_lock *lock, void *data) struct lov_oinfo *oinfo; ENTRY; - if (lock->l_ast_data == data) { - lock->l_ast_data = NULL; - - LASSERT(osc != NULL); - LASSERT(osc->oo_oinfo != NULL); - LASSERT(lvb != NULL); - - /* Updates lvb in lock by the cached oinfo */ - oinfo = osc->oo_oinfo; - - LDLM_DEBUG(lock, "update lock size %llu blocks %llu [cma]time: " - "%llu %llu %llu by oinfo size %llu blocks %llu " - "[cma]time %llu %llu %llu", lvb->lvb_size, - lvb->lvb_blocks, lvb->lvb_ctime, lvb->lvb_mtime, - lvb->lvb_atime, oinfo->loi_lvb.lvb_size, - oinfo->loi_lvb.lvb_blocks, oinfo->loi_lvb.lvb_ctime, - oinfo->loi_lvb.lvb_mtime, oinfo->loi_lvb.lvb_atime); - LASSERT(oinfo->loi_lvb.lvb_size >= oinfo->loi_kms); - - cl_object_attr_lock(&osc->oo_cl); - memcpy(lvb, &oinfo->loi_lvb, sizeof(oinfo->loi_lvb)); - cl_object_attr_unlock(&osc->oo_cl); - ldlm_clear_lvb_cached(lock); - } + if (lock->l_ast_data != data) + RETURN(LDLM_ITER_CONTINUE); + + lock->l_ast_data = NULL; + + LASSERT(osc != NULL); + LASSERT(osc->oo_oinfo != NULL); + + /* Updates lvb in lock by the cached oinfo */ + oinfo = osc->oo_oinfo; + + LDLM_DEBUG(lock, + "update lock size %llu blocks %llu [cma]time: %llu %llu %llu by oinfo size %llu blocks %llu [cma]time %llu %llu %llu", + lvb->lvb_size, lvb->lvb_blocks, lvb->lvb_ctime, + lvb->lvb_mtime, lvb->lvb_atime, oinfo->loi_lvb.lvb_size, + oinfo->loi_lvb.lvb_blocks, oinfo->loi_lvb.lvb_ctime, + oinfo->loi_lvb.lvb_mtime, oinfo->loi_lvb.lvb_atime); + LASSERT(oinfo->loi_lvb.lvb_size >= oinfo->loi_kms); + + cl_object_attr_lock(&osc->oo_cl); + /* l_ost_lvb is only in the LDLM_IBITS union **/ + LASSERT(lock->l_resource->lr_type == LDLM_IBITS); + memcpy(lvb, &oinfo->loi_lvb, sizeof(oinfo->loi_lvb)); + cl_object_attr_unlock(&osc->oo_cl); + ldlm_clear_lvb_cached(lock); + RETURN(LDLM_ITER_CONTINUE); } diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index a772865..fbfc1f4 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -965,6 +965,8 @@ int mdc_finish_enqueue(struct obd_export *exp, LDLM_DEBUG(lock, "DoM lock is returned by: %s, size: %llu", ldlm_it2str(it->it_op), body->mbo_dom_size); + /* l_ost_lvb is only in the LDLM_IBITS union **/ + LASSERT(lock->l_resource->lr_type == LDLM_IBITS); lock_res_and_lock(lock); mdc_body2lvb(body, &lock->l_ost_lvb); ldlm_lock_allow_match_locked(lock); diff --git a/lustre/mdt/mdt_lvb.c b/lustre/mdt/mdt_lvb.c index 7748bdc..c4236b7 100644 --- a/lustre/mdt/mdt_lvb.c +++ b/lustre/mdt/mdt_lvb.c @@ -62,6 +62,7 @@ int mdt_dom_lvb_alloc(struct ldlm_resource *res) } res->lr_lvb_data = lvb; + BUILD_BUG_ON(sizeof(*lvb) >= 1<<(sizeof(res->lr_lvb_len)*8-1)); res->lr_lvb_len = sizeof(*lvb); /* Store error in LVB to inidicate it has no data yet. diff --git a/lustre/ofd/ofd_lvb.c b/lustre/ofd/ofd_lvb.c index d9237b9..446af35 100644 --- a/lustre/ofd/ofd_lvb.c +++ b/lustre/ofd/ofd_lvb.c @@ -125,6 +125,7 @@ static int ofd_lvbo_init(struct ldlm_resource *res) info = ofd_info(env); res->lr_lvb_data = lvb; + BUILD_BUG_ON(sizeof(*lvb) >= 1 << (sizeof(res->lr_lvb_len) * 8 - 1)); res->lr_lvb_len = sizeof(*lvb); ost_fid_from_resid(&info->fti_fid, &res->lr_name, -- 1.8.3.1