Whamcloud - gitweb
LU-14520 ldlm: reduce ldlm_lock memory usage 85/56685/4
authorAndreas Dilger <adilger@whamcloud.com>
Mon, 14 Oct 2024 23:23:26 +0000 (17:23 -0600)
committerOleg Drokin <green@whamcloud.com>
Mon, 9 Dec 2024 06:12:58 +0000 (06:12 +0000)
Reduce the size of struct ldlm_lock so that it can fit into a 512-byte
slab allocation.  The primary reduction in memory usage is from moving
struct l_ost_lvb into the union for IBITS locks where it is needed,
while it was previously part of the common strucutre.  Add assertions
to verify that the l_ost_lvb field is only used for IBITS lock type.

Additionally, shrink some of the other fields in memory to bitfields
that only use the required bits, and pack them together to save space.
These are read-mostly fields for the lifetime of the lock and give
some space to add fields in the future before hitting 512 bytes again.

  ldlm_lock     BEFORE           PATCHED
      size: 536, members: 36     size: 496, members: 35
      sum used: 536, holes: 0    sum used: 490, holes: 2, sum holes: 6
      bit holes: 0               bit holes: 1, sum bit holes: 10

Also reduce struct ldlm_resource by a few bytes, so that 23 can fit
into a single 4096-byte slab instead of the previous 22 in one slab.

  ldlm_resource BEFORE           PATCHED
      size: 184, members: 14     size: 176, members: 14
      padding: 7, holes: 0       sum used: 171, holes: 1, sum holes: 5
      bit holes: 0               bit holes: 1, sum bit holes: 4

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I7f017a852228f8c9090e890f2766707fdbde2abd
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56685
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Yang Sheng <ys@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_dlm.h
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/llite/file.c
lustre/mdc/mdc_dev.c
lustre/mdc/mdc_locks.c
lustre/mdt/mdt_lvb.c
lustre/ofd/ofd_lvb.c

index cd4d6f0..adaa82f 100644 (file)
@@ -754,6 +754,7 @@ enum lvb_type {
        LVB_T_OST       = 1,
        LVB_T_LQUOTA    = 2,
        LVB_T_LAYOUT    = 3,
+       LVB_T_END
 };
 
 /**
@@ -805,27 +806,29 @@ struct ldlm_lock {
         * Internal structures per lock type..
         */
        union {
-               /* LDLM_EXTENT locks only */
-               struct {
+               struct { /* LDLM_EXTENT locks only */
                        /* Originally requested extent for the extent lock. */
                        struct ldlm_extent      l_req_extent;
                        struct rb_node          l_rb;
                        u64                     l_subtree_last;
                        struct list_head        l_same_extent;
                };
-               /* LDLM_PLAIN and LDLM_IBITS locks */
-               struct {
+               struct { /* LDLM_PLAIN and LDLM_IBITS locks */
                        /**
                         * Protected by lr_lock, linkages to "skip lists".
-                        * For more explanations of skip lists see ldlm/ldlm_inodebits.c
+                        * For explanations of skip lists see
+                        * ldlm/ldlm_inodebits.c
                         */
                        struct list_head        l_sl_mode;
                        struct list_head        l_sl_policy;
 
                        struct ldlm_ibits_node  *l_ibits_node;
+                       /* separate ost_lvb used mostly by Data-on-MDT for now.
+                        * It is introduced to don't mix with layout lock data.
+                        */
+                       struct ost_lvb           l_ost_lvb;
                };
-               /* LDLM_FLOCK locks */
-               struct {
+               struct { /* LDLM_FLOCK locks */
                        /**
                         * Per export hash of flock locks.
                         * Protected by per-bucket exp->exp_flock_hash locks.
@@ -842,15 +845,32 @@ struct ldlm_lock {
         * Protected by per-bucket exp->exp_lock_hash locks.
         */
        struct hlist_node       l_exp_hash;
+
+       /* Requested mode. Protected by lr_lock. */
+       enum ldlm_mode          l_req_mode:9;
+       /* Granted mode, also protected by lr_lock.  */
+       enum ldlm_mode          l_granted_mode:9;
+
        /**
-        * Requested mode.
-        * Protected by lr_lock.
+        * Whether the blocking AST was sent for this lock.
+        * This is for debugging. Valid values are 0 and 1, if there is an
+        * attempt to send blocking AST more than once, an assertion would be
+        * hit. \see ldlm_work_bl_ast_lock
         */
-       enum ldlm_mode          l_req_mode;
-       /**
-        * Granted mode, also protected by lr_lock.
+       unsigned int            l_bl_ast_run:1;
+
+       /* content type for lock value block */
+       enum lvb_type           l_lvb_type:3;
+       /* unsigned int         l_unused_bits:10; */
+       u16                     l_lvb_len;
+       /* u16                  l_unused; */
+
+       /*
+        * Temporary storage for a LVB received during an enqueue operation.
+        * May be vmalloc'd, so needs to be freed with OBD_FREE_LARGE().
         */
-       enum ldlm_mode          l_granted_mode;
+       void                    *l_lvb_data;
+
        /** Lock completion handler pointer. Called when lock is granted. */
        ldlm_completion_callback l_completion_ast;
        /**
@@ -920,19 +940,6 @@ struct ldlm_lock {
         */
        ktime_t                 l_last_used;
 
-       /*
-        * Client-side-only members.
-        */
-
-       enum lvb_type         l_lvb_type;
-
-       /**
-        * Temporary storage for a LVB received during an enqueue operation.
-        * May be vmalloc'd, so needs to be freed with OBD_FREE_LARGE().
-        */
-       __u32                   l_lvb_len;
-       void                    *l_lvb_data;
-
        /** Private storage for lock user. Opaque to LDLM. */
        void                    *l_ast_data;
 
@@ -946,10 +953,6 @@ struct ldlm_lock {
                time64_t        l_blast_sent;
        };
 
-       /* separate ost_lvb used mostly by Data-on-MDT for now.
-        * It is introduced to don't mix with layout lock data.
-        */
-       struct ost_lvb           l_ost_lvb;
        /*
         * Server-side-only members.
         */
@@ -981,14 +984,8 @@ struct ldlm_lock {
 
        /** Local PID of process which created this lock. */
        __u32                   l_pid;
+       /* __u32                l_unused; */
 
-       /**
-        * Number of times blocking AST was sent for this lock.
-        * This is for debugging. Valid values are 0 and 1, if there is an
-        * attempt to send blocking AST more than once, an assertion would be
-        * hit. \see ldlm_work_bl_ast_lock
-        */
-       int                     l_bl_ast_run;
        /** List item ldlm_add_ast_work_item() for case of blocking ASTs. */
        struct list_head        l_bl_ast;
        /** List item ldlm_add_ast_work_item() for case of completion ASTs. */
@@ -1140,18 +1137,19 @@ struct ldlm_resource {
        };
 
        /** Type of locks this resource can hold. Only one type per resource. */
-       enum ldlm_type          lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */
+       enum ldlm_type          lr_type:4; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */
+       /* unsigned int         lr_unused_bits:4; */
+       /* char                 lr_unused[5]; */
 
        /**
         * Server-side-only lock value block elements.
         * To serialize lvbo_init.
         */
-       int                     lr_lvb_len;
+       bool                    lr_lvb_initialized;
+       char                    lr_lvb_len;
        struct mutex            lr_lvb_mutex;
        /** protected by lr_lock */
        void                    *lr_lvb_data;
-       /** is lvb initialized ? */
-       bool                    lr_lvb_initialized;
 };
 
 static inline int ldlm_is_granted(struct ldlm_lock *lock)
index 63ebd47..08c6b5a 100644 (file)
@@ -1558,18 +1558,18 @@ EXPORT_SYMBOL(ldlm_revalidate_lock_handle);
 
 /** The caller must guarantee that the buffer is large enough. */
 int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
-                 enum req_location loc, void *data, int size)
+                 enum req_location loc, void *data, int lvb_len)
 {
        void *lvb;
 
        ENTRY;
 
        LASSERT(data != NULL);
-       LASSERT(size >= 0);
+       LASSERT(lvb_len >= 0);
 
        switch (lock->l_lvb_type) {
        case LVB_T_OST:
-               if (size == sizeof(struct ost_lvb)) {
+               if (lvb_len == sizeof(struct ost_lvb)) {
                        if (loc == RCL_CLIENT)
                                lvb = req_capsule_client_swab_get(pill,
                                                &RMF_DLM_LVB,
@@ -1583,8 +1583,8 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
                                RETURN(-EPROTO);
                        }
 
-                       memcpy(data, lvb, size);
-               } else if (size == sizeof(struct ost_lvb_v1)) {
+                       memcpy(data, lvb, lvb_len);
+               } else if (lvb_len == sizeof(struct ost_lvb_v1)) {
                        struct ost_lvb *olvb = data;
 
                        if (loc == RCL_CLIENT)
@@ -1593,25 +1593,25 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
                                                lustre_swab_ost_lvb_v1);
                        else
                                lvb = req_capsule_server_sized_swab_get(pill,
-                                               &RMF_DLM_LVB, size,
+                                               &RMF_DLM_LVB, lvb_len,
                                                lustre_swab_ost_lvb_v1);
                        if (unlikely(lvb == NULL)) {
                                LDLM_ERROR(lock, "no LVB");
                                RETURN(-EPROTO);
                        }
 
-                       memcpy(data, lvb, size);
+                       memcpy(data, lvb, lvb_len);
                        olvb->lvb_mtime_ns = 0;
                        olvb->lvb_atime_ns = 0;
                        olvb->lvb_ctime_ns = 0;
                } else {
                        LDLM_ERROR(lock, "Replied unexpected ost LVB size %d",
-                                  size);
+                                  lvb_len);
                        RETURN(-EINVAL);
                }
                break;
        case LVB_T_LQUOTA:
-               if (size == sizeof(struct lquota_lvb)) {
+               if (lvb_len == sizeof(struct lquota_lvb)) {
                        if (loc == RCL_CLIENT)
                                lvb = req_capsule_client_swab_get(pill,
                                                &RMF_DLM_LVB,
@@ -1625,16 +1625,16 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
                                RETURN(-EPROTO);
                        }
 
-                       memcpy(data, lvb, size);
+                       memcpy(data, lvb, lvb_len);
                } else {
                        LDLM_ERROR(lock,
                                   "Replied unexpected lquota LVB size %d",
-                                  size);
+                                  lvb_len);
                        RETURN(-EINVAL);
                }
                break;
        case LVB_T_LAYOUT:
-               if (size == 0)
+               if (lvb_len == 0)
                        break;
 
                if (loc == RCL_CLIENT)
@@ -1646,10 +1646,11 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
                        RETURN(-EPROTO);
                }
 
-               memcpy(data, lvb, size);
+               memcpy(data, lvb, lvb_len);
                break;
        default:
-               LDLM_ERROR(lock, "Unknown LVB type: %d", lock->l_lvb_type);
+               LDLM_ERROR(lock, "Unknown LVB type=%d, size=%d",
+                          lock->l_lvb_type, lvb_len);
                dump_stack();
                RETURN(-EINVAL);
        }
@@ -1702,6 +1703,7 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
        }
 
        if (lvb_len) {
+               LASSERT(lvb_len < 1 << sizeof(lock->l_lvb_len) * 8);
                lock->l_lvb_len = lvb_len;
                OBD_ALLOC_LARGE(lock->l_lvb_data, lvb_len);
                if (lock->l_lvb_data == NULL)
@@ -2154,7 +2156,7 @@ ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
 
        LASSERT(ldlm_is_ast_sent(lock));
        LASSERT(lock->l_bl_ast_run == 0);
-       lock->l_bl_ast_run++;
+       lock->l_bl_ast_run = 1;
        ldlm_clear_blocking_lock(lock);
        unlock_res_and_lock(lock);
 
index 32d9f89..3e15d5c 100644 (file)
@@ -2010,7 +2010,7 @@ static int ldlm_handle_cp_callback(struct ptlrpc_request *req,
 
                        if (unlikely(lock->l_lvb_len < lvb_len)) {
                                LDLM_ERROR(lock,
-                                          "Replied LVB is larger than expectation, expected = %d, replied = %d",
+                                          "Replied LVB is larger than expectation, expected = %u, replied = %u",
                                           lock->l_lvb_len, lvb_len);
                                GOTO(out, rc = -EINVAL);
                        }
@@ -3497,6 +3497,10 @@ static int ldlm_cleanup(void)
 
 int ldlm_init(void)
 {
+       BUILD_BUG_ON(LDLM_MAX_TYPE > (1 << 4 /* lr_type bits */));
+       BUILD_BUG_ON(LVB_T_END      > (1 << 3 /* l_lvb_type bits */));
+       BUILD_BUG_ON(LCK_MAXMODE   > (1 << 9 /* l_req_mode/l_granted_mode */));
+
        ldlm_resource_slab = kmem_cache_create("ldlm_resources",
                                               sizeof(struct ldlm_resource), 0,
                                               SLAB_HWCACHE_ALIGN, NULL);
index e7ac552..d2e6c1f 100644 (file)
@@ -622,7 +622,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct req_capsule *pill,
        }
 
        LASSERTF(ergo(lvb_len != 0, lvb_len == lock->l_lvb_len),
-                "lvb_len = %d, l_lvb_len = %d\n", lvb_len, lock->l_lvb_len);
+                "lvb_len = %u, l_lvb_len = %u\n", lvb_len, lock->l_lvb_len);
 
        if (rc != ELDLM_OK) {
                LASSERT(!is_replay);
index d747495..924d128 100644 (file)
@@ -6707,7 +6707,7 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
        int rc;
 
        ENTRY;
-       CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
+       CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%u\n",
               PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
               lock->l_lvb_data, lock->l_lvb_len);
 
index fe96b96..92f58d1 100644 (file)
@@ -414,6 +414,8 @@ void mdc_lock_lvb_update(const struct lu_env *env, struct osc_object *osc,
 
        if (lvb == NULL) {
                LASSERT(dlmlock != NULL);
+               /* l_ost_lvb is only in the LDLM_IBITS union **/
+               LASSERT(dlmlock->l_resource->lr_type == LDLM_IBITS);
                lvb = &dlmlock->l_ost_lvb;
        }
        cl_lvb2attr(attr, lvb);
@@ -605,9 +607,10 @@ static int mdc_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 
                /* At this point ols_lvb must be filled with correct LVB either
                 * by mdc_fill_lvb() above or by ldlm_cli_enqueue_fini().
-                * DoM uses l_ost_lvb to store LVB data, so copy it here from
-                * just updated ols_lvb.
+                * DoM uses l_ost_lvb to store LVB data (only available with
+                * LDLM_IBITS locks), so copy it here from just updated ols_lvb.
                 */
+               LASSERT(lock->l_resource->lr_type == LDLM_IBITS);
                lock_res_and_lock(lock);
                memcpy(&lock->l_ost_lvb, &ols->ols_lvb,
                       sizeof(lock->l_ost_lvb));
@@ -1455,30 +1458,32 @@ static int mdc_object_ast_clear(struct ldlm_lock *lock, void *data)
        struct lov_oinfo *oinfo;
        ENTRY;
 
-       if (lock->l_ast_data == data) {
-               lock->l_ast_data = NULL;
-
-               LASSERT(osc != NULL);
-               LASSERT(osc->oo_oinfo != NULL);
-               LASSERT(lvb != NULL);
-
-               /* Updates lvb in lock by the cached oinfo */
-               oinfo = osc->oo_oinfo;
-
-               LDLM_DEBUG(lock, "update lock size %llu blocks %llu [cma]time: "
-                          "%llu %llu %llu by oinfo size %llu blocks %llu "
-                          "[cma]time %llu %llu %llu", lvb->lvb_size,
-                          lvb->lvb_blocks, lvb->lvb_ctime, lvb->lvb_mtime,
-                          lvb->lvb_atime, oinfo->loi_lvb.lvb_size,
-                          oinfo->loi_lvb.lvb_blocks, oinfo->loi_lvb.lvb_ctime,
-                          oinfo->loi_lvb.lvb_mtime, oinfo->loi_lvb.lvb_atime);
-               LASSERT(oinfo->loi_lvb.lvb_size >= oinfo->loi_kms);
-
-               cl_object_attr_lock(&osc->oo_cl);
-               memcpy(lvb, &oinfo->loi_lvb, sizeof(oinfo->loi_lvb));
-               cl_object_attr_unlock(&osc->oo_cl);
-               ldlm_clear_lvb_cached(lock);
-       }
+       if (lock->l_ast_data != data)
+               RETURN(LDLM_ITER_CONTINUE);
+
+       lock->l_ast_data = NULL;
+
+       LASSERT(osc != NULL);
+       LASSERT(osc->oo_oinfo != NULL);
+
+       /* Updates lvb in lock by the cached oinfo */
+       oinfo = osc->oo_oinfo;
+
+       LDLM_DEBUG(lock,
+                  "update lock size %llu blocks %llu [cma]time: %llu %llu %llu by oinfo size %llu blocks %llu [cma]time %llu %llu %llu",
+                  lvb->lvb_size, lvb->lvb_blocks, lvb->lvb_ctime,
+                  lvb->lvb_mtime, lvb->lvb_atime, oinfo->loi_lvb.lvb_size,
+                  oinfo->loi_lvb.lvb_blocks, oinfo->loi_lvb.lvb_ctime,
+                  oinfo->loi_lvb.lvb_mtime, oinfo->loi_lvb.lvb_atime);
+       LASSERT(oinfo->loi_lvb.lvb_size >= oinfo->loi_kms);
+
+       cl_object_attr_lock(&osc->oo_cl);
+       /* l_ost_lvb is only in the LDLM_IBITS union **/
+       LASSERT(lock->l_resource->lr_type == LDLM_IBITS);
+       memcpy(lvb, &oinfo->loi_lvb, sizeof(oinfo->loi_lvb));
+       cl_object_attr_unlock(&osc->oo_cl);
+       ldlm_clear_lvb_cached(lock);
+
        RETURN(LDLM_ITER_CONTINUE);
 }
 
index a772865..fbfc1f4 100644 (file)
@@ -965,6 +965,8 @@ int mdc_finish_enqueue(struct obd_export *exp,
                LDLM_DEBUG(lock, "DoM lock is returned by: %s, size: %llu",
                           ldlm_it2str(it->it_op), body->mbo_dom_size);
 
+               /* l_ost_lvb is only in the LDLM_IBITS union **/
+               LASSERT(lock->l_resource->lr_type == LDLM_IBITS);
                lock_res_and_lock(lock);
                mdc_body2lvb(body, &lock->l_ost_lvb);
                ldlm_lock_allow_match_locked(lock);
index 7748bdc..c4236b7 100644 (file)
@@ -62,6 +62,7 @@ int mdt_dom_lvb_alloc(struct ldlm_resource *res)
                }
 
                res->lr_lvb_data = lvb;
+               BUILD_BUG_ON(sizeof(*lvb) >= 1<<(sizeof(res->lr_lvb_len)*8-1));
                res->lr_lvb_len = sizeof(*lvb);
 
                /* Store error in LVB to inidicate it has no data yet.
index d9237b9..446af35 100644 (file)
@@ -125,6 +125,7 @@ static int ofd_lvbo_init(struct ldlm_resource *res)
 
        info = ofd_info(env);
        res->lr_lvb_data = lvb;
+       BUILD_BUG_ON(sizeof(*lvb) >= 1 << (sizeof(res->lr_lvb_len) * 8 - 1));
        res->lr_lvb_len = sizeof(*lvb);
 
        ost_fid_from_resid(&info->fti_fid, &res->lr_name,