Whamcloud - gitweb
LU-18357 ptlrpc: Use dynamic memory allocation for filesets 23/57523/6
authorMarc Vef <mvef@whamcloud.com>
Thu, 19 Dec 2024 10:09:24 +0000 (11:09 +0100)
committerOleg Drokin <green@whamcloud.com>
Fri, 28 Feb 2025 08:11:33 +0000 (08:11 +0000)
Currently, a nodemap's fileset uses a static-sized array which may be
much larger than needed. This patch adds dynamic memory allocation for
a nodemap's fileset to reduce the future memory footprint when
multiple filesets per nodemap are specified.

Note, when reading fileset fragments from the nodemap IAM, the needed
size is not known yet. Therefore, each fileset is initially
preallocated with "PATH_MAX + 1" and then shrunk when the actual size
is know. This is done on the last step, when a nodemap is set active,
e.g., after reading all IAM idx pages.

The nodemap's fileset is further renamed as future "lctl
nodemap_set_fileset" commands can only modify the primary fileset.

Test-Parameters: trivial testlist=sanity-sec
Signed-off-by: Marc Vef <mvef@whamcloud.com>
Change-Id: I39bd3355c96fcf3938aaa52c8e5ff1eb70617378
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57523
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_nodemap.h
lustre/ptlrpc/nodemap_handler.c
lustre/ptlrpc/nodemap_lproc.c
lustre/ptlrpc/nodemap_storage.c

index 4eaf632..c7344b5 100644 (file)
@@ -97,7 +97,8 @@ struct lu_nodemap {
        struct hlist_node        nm_hash;
        struct nodemap_pde      *nm_pde_data;
        /* fileset the nodes of this nodemap are restricted to */
-       char                     nm_fileset[PATH_MAX + 1];
+       char                     *nm_prim_fileset;
+       unsigned int             nm_prim_fileset_size;
        /* information about the expected SELinux policy on the nodes */
        char                     nm_sepol[LUSTRE_NODEMAP_SEPOL_LENGTH + 1];
 
index c0908ac..6238024 100644 (file)
@@ -49,6 +49,8 @@ static void nodemap_destroy(struct lu_nodemap *nodemap)
        if (nodemap->nm_pde_data != NULL)
                lprocfs_nodemap_remove(nodemap->nm_pde_data);
 
+       OBD_FREE(nodemap->nm_prim_fileset, nodemap->nm_prim_fileset_size);
+
        mutex_lock(&active_config_lock);
        down_read(&active_config->nmc_range_tree_lock);
        nm_member_reclassify_nodemap(nodemap);
@@ -971,7 +973,6 @@ static void nodemap_inherit_properties(struct lu_nodemap *dst,
                dst->nm_squash_uid = NODEMAP_NOBODY_UID;
                dst->nm_squash_gid = NODEMAP_NOBODY_GID;
                dst->nm_squash_projid = NODEMAP_NOBODY_PROJID;
-               dst->nm_fileset[0] = '\0';
                dst->nm_sepol[0] = '\0';
                dst->nm_offset_start_uid = 0;
                dst->nm_offset_limit_uid = 0;
@@ -979,6 +980,8 @@ static void nodemap_inherit_properties(struct lu_nodemap *dst,
                dst->nm_offset_limit_gid = 0;
                dst->nm_offset_start_projid = 0;
                dst->nm_offset_limit_projid = 0;
+               dst->nm_prim_fileset = NULL;
+               dst->nm_prim_fileset_size = 0;
        } else {
                dst->nmf_trust_client_ids = src->nmf_trust_client_ids;
                dst->nmf_allow_root_access = src->nmf_allow_root_access;
@@ -995,7 +998,6 @@ static void nodemap_inherit_properties(struct lu_nodemap *dst,
                dst->nm_squash_gid = src->nm_squash_gid;
                dst->nm_squash_projid = src->nm_squash_projid;
                if (is_new) {
-                       dst->nm_fileset[0] = '\0';
                        dst->nm_sepol[0] = '\0';
                }
                dst->nm_offset_start_uid = src->nm_offset_start_uid;
@@ -1004,7 +1006,9 @@ static void nodemap_inherit_properties(struct lu_nodemap *dst,
                dst->nm_offset_limit_gid = src->nm_offset_limit_gid;
                dst->nm_offset_start_projid = src->nm_offset_start_projid;
                dst->nm_offset_limit_projid = src->nm_offset_limit_projid;
-
+               /* filesets cannot yet be inherited */
+               dst->nm_prim_fileset = NULL;
+               dst->nm_prim_fileset_size = 0;
        }
 }
 
@@ -1153,6 +1157,13 @@ out:
 }
 EXPORT_SYMBOL(nodemap_del_range);
 
+static void nodemap_fileset_reset(struct lu_nodemap *nodemap)
+{
+       OBD_FREE(nodemap->nm_prim_fileset, nodemap->nm_prim_fileset_size);
+       nodemap->nm_prim_fileset = NULL;
+       nodemap->nm_prim_fileset_size = 0;
+}
+
 /**
  * Set a fileset on a nodemap in memory and the nodemap IAM records.
  * If the nodemap is dynamic, the nodemap IAM update is transparently skipped in
@@ -1162,47 +1173,61 @@ EXPORT_SYMBOL(nodemap_del_range);
  * \param      fileset         string containing fileset
  * \retval     0 on success
  * \retval     -EINVAL         invalid fileset: Does not start with '/'
- * \retval     -ENAMETOOLONG   fileset is too long
  * \retval     -EIO            undo operation failed during IAM update
  */
 static int nodemap_set_fileset_iam(struct lu_nodemap *nodemap,
                                   const char *fileset)
 {
+       size_t fileset_size_new;
+       char *fileset_new;
        int rc = 0;
 
-       if (strlen(fileset) > PATH_MAX)
-               RETURN(-ENAMETOOLONG);
-
        if (fileset[0] == '\0' || strcmp(fileset, "clear") == 0) {
                rc = nodemap_idx_fileset_clear(nodemap);
-               if (rc == 0)
-                       nodemap->nm_fileset[0] = '\0';
-       } else if (fileset[0] != '/') {
-               rc = -EINVAL;
-       } else {
-               /*
-                * Only update the index if the fileset is already set.
-                * If it was set by the params llog, it is not set in the IAM.
-                */
-               if (nodemap->nm_fileset[0] != '\0' &&
-                   nodemap->nmf_fileset_use_iam) {
+               if (!rc)
+                       nodemap_fileset_reset(nodemap);
+               GOTO(out, rc);
+       }
+
+       if (fileset[0] != '/')
+               RETURN(-EINVAL);
+
+       fileset_size_new = strlen(fileset) + 1;
+
+       OBD_ALLOC(fileset_new, fileset_size_new);
+       if (!fileset_new)
+               GOTO(out, rc = -ENOMEM);
+
+       memcpy(fileset_new, fileset, fileset_size_new);
+
+       /*
+        * Only update the index if the fileset is already set.
+        * If it was set by the params llog, it is not set in the IAM.
+        */
+       if (nodemap->nm_prim_fileset) {
+               if (nodemap->nmf_fileset_use_iam) {
                        rc = nodemap_idx_fileset_update(
-                               nodemap, nodemap->nm_fileset, fileset);
-               } else {
-                       rc = nodemap_idx_fileset_add(nodemap, fileset);
+                               nodemap, nodemap->nm_prim_fileset, fileset_new);
                }
-               if (rc < 0)
-                       GOTO(out, rc);
+               if (!rc)
+                       nodemap_fileset_reset(nodemap);
+       } else {
+               rc = nodemap_idx_fileset_add(nodemap, fileset_new);
+       }
 
-               memcpy(nodemap->nm_fileset, fileset, strlen(fileset) + 1);
+       if (!rc) {
+               nodemap->nm_prim_fileset = fileset_new;
+               nodemap->nm_prim_fileset_size = fileset_size_new;
+       } else {
+               OBD_FREE(fileset_new, fileset_size_new);
        }
 
-       if (!nodemap->nm_dyn && !nodemap->nmf_fileset_use_iam) {
+out:
+       if (!nodemap->nm_dyn && !nodemap->nmf_fileset_use_iam && !rc) {
                nodemap->nmf_fileset_use_iam = 1;
                rc = nodemap_idx_nodemap_update(nodemap);
        }
 
-out:
        return rc;
 }
 
@@ -1217,12 +1242,12 @@ out:
  * \param      fileset         string containing fileset
  * \retval     0 on success
  * \retval     -EINVAL         invalid fileset: Does not start with '/'
- * \retval     -ENAMETOOLONG   fileset is too long
  */
 static int nodemap_set_fileset_local(struct lu_nodemap *nodemap,
                                     const char *fileset)
 {
-       int rc = 0;
+       size_t fileset_size_new;
+       char *fileset_new;
 
        /* Allow 'fileset=clear' in addition to 'fileset=""' to clear fileset
         * because either command 'lctl set_param -P *.*.fileset=""' or
@@ -1234,19 +1259,29 @@ static int nodemap_set_fileset_local(struct lu_nodemap *nodemap,
         * 'fileset=""' is still kept for compatibility reason.
         */
        if (fileset[0] == '\0' || strcmp(fileset, "clear") == 0) {
-               nodemap->nm_fileset[0] = '\0';
-       } else if (fileset[0] != '/') {
-               rc = -EINVAL;
-
-       } else if (strscpy(nodemap->nm_fileset, fileset,
-                          sizeof(nodemap->nm_fileset)) < 0) {
-               rc = -ENAMETOOLONG;
-               /* function may be called from llog thread, so we CERROR here */
-               CERROR("%s: fileset '%s' is too long: rc = %d\n",
-                      nodemap->nm_name, fileset, rc);
+               nodemap_fileset_reset(nodemap);
+               RETURN(0);
        }
 
-       return rc;
+       if (fileset[0] != '/')
+               RETURN(-EINVAL);
+
+       fileset_size_new = strlen(fileset) + 1;
+
+       OBD_ALLOC(fileset_new, fileset_size_new);
+       if (!fileset_new)
+               RETURN(-ENOMEM);
+
+       memcpy(fileset_new, fileset, fileset_size_new);
+
+       /* free existing fileset first on update */
+       if (nodemap->nm_prim_fileset)
+               nodemap_fileset_reset(nodemap);
+
+       nodemap->nm_prim_fileset = fileset_new;
+       nodemap->nm_prim_fileset_size = fileset_size_new;
+
+       return 0;
 }
 
 /**
@@ -1257,6 +1292,8 @@ static int nodemap_set_fileset_local(struct lu_nodemap *nodemap,
  * \param      checkperm       true if permission check is required
  * \param      ioctl_op        true if called from ioctl nodemap functions
  * \retval     0 on success
+ * \retval     -ENAMETOOLONG   fileset is too long
+ * \retval     -EINVAL         name or fileset is empty or NULL
  */
 int nodemap_set_fileset(const char *name, const char *fileset, bool checkperm,
                        bool ioctl_op)
@@ -1269,6 +1306,9 @@ int nodemap_set_fileset(const char *name, const char *fileset, bool checkperm,
        if (name == NULL || name[0] == '\0' || fileset == NULL)
                RETURN(-EINVAL);
 
+       if (strlen(fileset) > PATH_MAX)
+               RETURN(-ENAMETOOLONG);
+
        mutex_lock(&active_config_lock);
        nodemap = nodemap_lookup(name);
        if (IS_ERR(nodemap)) {
@@ -1316,7 +1356,7 @@ char *nodemap_get_fileset(const struct lu_nodemap *nodemap)
        if (!nodemap_active)
                return NULL;
 
-       return (char *)nodemap->nm_fileset;
+       return (char *)nodemap->nm_prim_fileset;
 }
 EXPORT_SYMBOL(nodemap_get_fileset);
 
@@ -2037,12 +2077,12 @@ int nodemap_del(const char *nodemap_name)
        }
        up_write(&active_config->nmc_range_tree_lock);
 
-       if (nodemap->nm_fileset[0] != '\0') {
+       if (nodemap->nm_prim_fileset) {
                rc2 = nodemap_idx_fileset_clear(nodemap);
                if (rc2 < 0)
                        rc = rc2;
 
-               nodemap->nm_fileset[0] = '\0';
+               nodemap_fileset_reset(nodemap);
        }
 
        if (!nodemap->nm_dyn) {
index 92ddb0d..445acf8 100644 (file)
@@ -210,8 +210,11 @@ static int nodemap_fileset_seq_show(struct seq_file *m, void *data)
                        (char *)m->private, rc);
                return rc;
        }
+       if (nodemap->nm_prim_fileset && nodemap->nm_prim_fileset[0] != '\0')
+               seq_printf(m, "%s\n", nodemap->nm_prim_fileset);
+       else
+               seq_puts(m, "\n");
 
-       seq_printf(m, "%s\n", nodemap->nm_fileset);
        nodemap_putref(nodemap);
        return rc;
 }
index 08e3e0c..579e822 100644 (file)
@@ -1331,6 +1331,7 @@ static int nodemap_cluster_rec_helper(struct nodemap_config *config,
        struct lu_nodemap *nodemap, *old_nm;
        enum nm_flag_bits flags;
        enum nm_flag2_bits flags2;
+       int rc = 0;
 
        nodemap = cfs_hash_lookup(config->nmc_nodemap_hash, rec->ncr.ncr_name);
        if (nodemap == NULL) {
@@ -1346,10 +1347,8 @@ static int nodemap_cluster_rec_helper(struct nodemap_config *config,
                if (nodemap_id > config->nmc_nodemap_highest_id)
                        config->nmc_nodemap_highest_id = nodemap_id;
 
-       } else if (nodemap->nm_id != nodemap_id) {
-               nodemap_putref(nodemap);
-               return -EINVAL;
-       }
+       } else if (nodemap->nm_id != nodemap_id)
+               GOTO(out_nodemap, rc = -EINVAL);
 
        nodemap->nm_squash_uid = le32_to_cpu(rec->ncr.ncr_squash_uid);
        nodemap->nm_squash_gid = le32_to_cpu(rec->ncr.ncr_squash_gid);
@@ -1384,9 +1383,21 @@ static int nodemap_cluster_rec_helper(struct nodemap_config *config,
        if (!nodemap->nmf_fileset_use_iam) {
                mutex_lock(&active_config_lock);
                old_nm = nodemap_lookup(rec->ncr.ncr_name);
-               if (!IS_ERR(old_nm) && old_nm->nm_fileset[0] != '\0')
-                       strscpy(nodemap->nm_fileset, old_nm->nm_fileset,
-                               sizeof(nodemap->nm_fileset));
+               if (!IS_ERR(old_nm) && old_nm->nm_prim_fileset &&
+                   old_nm->nm_prim_fileset[0] != '\0') {
+                       OBD_ALLOC(nodemap->nm_prim_fileset,
+                                 old_nm->nm_prim_fileset_size);
+                       if (!nodemap->nm_prim_fileset) {
+                               mutex_unlock(&active_config_lock);
+                               nodemap_putref(old_nm);
+                               GOTO(out_nodemap, rc = -ENOMEM);
+                       }
+                       nodemap->nm_prim_fileset_size =
+                               old_nm->nm_prim_fileset_size;
+                       memcpy(nodemap->nm_prim_fileset,
+                              old_nm->nm_prim_fileset,
+                              old_nm->nm_prim_fileset_size);
+               }
                mutex_unlock(&active_config_lock);
                if (!IS_ERR(old_nm))
                        nodemap_putref(old_nm);
@@ -1398,9 +1409,11 @@ static int nodemap_cluster_rec_helper(struct nodemap_config *config,
        } else {
                list_add(&nodemap->nm_list, &(*recent_nodemap)->nm_list);
        }
+
+out_nodemap:
        nodemap_putref(nodemap);
 
-       return 0;
+       return rc;
 }
 
 static int nodemap_cluster_roles_helper(struct lu_nodemap *nodemap,
@@ -1424,20 +1437,30 @@ static int nodemap_cluster_roles_helper(struct lu_nodemap *nodemap,
 static int nodemap_cluster_fileset_helper(struct lu_nodemap *nodemap,
                                          const union nodemap_rec *rec)
 {
-       unsigned int fragment_id, fragment_len, fset_offset, fset_len_remain;
+       unsigned int fragment_id, fragment_len, fset_offset, fset_len_remain,
+               fset_prealloc_size;
 
        fragment_id = le16_to_cpu(rec->nfr.nfr_fragment_id);
        fragment_len = LUSTRE_NODEMAP_FILESET_FRAGMENT_SIZE;
+       fset_prealloc_size = PATH_MAX + 1;
+
+       /* preallocate fileset for first occurring fragment with PATH_MAX + 1 */
+       if (!nodemap->nm_prim_fileset) {
+               OBD_ALLOC(nodemap->nm_prim_fileset, fset_prealloc_size);
+               if (!nodemap->nm_prim_fileset)
+                       return -ENOMEM;
+               nodemap->nm_prim_fileset_size = fset_prealloc_size;
+       }
 
        /* compute nodemap fileset position */
        fset_offset = fragment_id * LUSTRE_NODEMAP_FILESET_FRAGMENT_SIZE;
-       fset_len_remain = sizeof(nodemap->nm_fileset) - fset_offset;
+       fset_len_remain = nodemap->nm_prim_fileset_size - fset_offset;
 
        if (fragment_len > fset_len_remain)
                fragment_len = fset_len_remain;
 
-       memcpy(nodemap->nm_fileset + fset_offset, rec->nfr.nfr_path_fragment,
-              fragment_len);
+       memcpy(nodemap->nm_prim_fileset + fset_offset,
+              rec->nfr.nfr_path_fragment, fragment_len);
 
        return 0;
 }
@@ -1953,6 +1976,51 @@ void nodemap_config_set_loading_mgc(bool loading)
 EXPORT_SYMBOL(nodemap_config_set_loading_mgc);
 
 /**
+ * After all index pages are read, filesets may use more memory than necessary.
+ * So each nodemap's fileset is resized to its actual size.
+ *
+ * \param config       current active nodemap config
+ */
+static void nodemap_fileset_resize(struct nodemap_config *config)
+{
+       struct lu_nodemap *nodemap;
+       unsigned int fset_size_actual, fset_size_prealloc;
+       char *fset_tmp;
+       LIST_HEAD(nodemap_list_head);
+
+       mutex_lock(&active_config_lock);
+
+       cfs_hash_for_each_safe(config->nmc_nodemap_hash, nm_hash_list_cb,
+                              &nodemap_list_head);
+       list_for_each_entry(nodemap, &nodemap_list_head, nm_list) {
+               if (!nodemap->nm_prim_fileset)
+                       continue;
+
+               fset_size_prealloc = nodemap->nm_prim_fileset_size;
+               fset_size_actual = strlen(nodemap->nm_prim_fileset) + 1;
+               if (fset_size_actual == fset_size_prealloc)
+                       continue;
+
+               /* Shrink fileset size to actual */
+               OBD_ALLOC(fset_tmp, fset_size_actual);
+               if (!fset_tmp) {
+                       CERROR("%s: Nodemaps's fileset cannot be resized: rc = %d\n",
+                              nodemap->nm_name, -ENOMEM);
+                       continue;
+               }
+
+               memcpy(fset_tmp, nodemap->nm_prim_fileset, fset_size_actual);
+
+               OBD_FREE(nodemap->nm_prim_fileset, fset_size_prealloc);
+
+               nodemap->nm_prim_fileset_size = fset_size_actual;
+               nodemap->nm_prim_fileset = fset_tmp;
+       }
+
+       mutex_unlock(&active_config_lock);
+}
+
+/**
  * Ensures that configs loaded over the wire are prioritized over those loaded
  * from disk.
  *
@@ -1962,6 +2030,7 @@ void nodemap_config_set_active_mgc(struct nodemap_config *config)
 {
        mutex_lock(&nodemap_config_loaded_lock);
        nodemap_config_set_active(config);
+       nodemap_fileset_resize(config);
        nodemap_config_loaded = 1;
        nodemap_save_all_caches();
        mutex_unlock(&nodemap_config_loaded_lock);